Merge branch 'master' of ../mesa into vulkan
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
37 files changed:
1  2 
configure.ac
src/Makefile.am
src/glsl/Makefile.am
src/glsl/Makefile.sources
src/glsl/ast_to_hir.cpp
src/glsl/glsl_parser_extras.cpp
src/glsl/glsl_types.cpp
src/glsl/glsl_types.h
src/glsl/ir_clone.cpp
src/glsl/link_uniform_initializers.cpp
src/glsl/nir/glsl_to_nir.cpp
src/glsl/nir/nir.h
src/glsl/nir/nir_builder.h
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_samplers.c
src/glsl/nir/nir_opt_algebraic.py
src/glsl/nir/nir_split_var_copies.c
src/glsl/nir/nir_types.cpp
src/glsl/nir/nir_types.h
src/glsl/standalone_scaffolding.cpp
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_device_info.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_program.c
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/main/mtypes.h
src/mesa/program/ir_to_mesa.cpp
src/vulkan/anv_compiler.cpp
src/vulkan/anv_nir_apply_dynamic_offsets.c

diff --combined configure.ac
index 4669a67dc04709213453ce17d0745b1df1fc0208,217281f79507a449bdc94a15b7ed0751c09f41ef..fa64dab4e48c0fa9865da81eaf46e8bef642797f
@@@ -74,7 -74,7 +74,7 @@@ LIBDRM_AMDGPU_REQUIRED=2.4.6
  LIBDRM_INTEL_REQUIRED=2.4.61
  LIBDRM_NVVIEUX_REQUIRED=2.4.33
  LIBDRM_NOUVEAU_REQUIRED=2.4.62
- LIBDRM_FREEDRENO_REQUIRED=2.4.64
+ LIBDRM_FREEDRENO_REQUIRED=2.4.65
  DRI2PROTO_REQUIRED=2.6
  DRI3PROTO_REQUIRED=1.0
  PRESENTPROTO_REQUIRED=1.0
@@@ -99,7 -99,6 +99,7 @@@ AM_PROG_CC_C_
  AM_PROG_AS
  AX_CHECK_GNU_MAKE
  AC_CHECK_PROGS([PYTHON2], [python2 python])
 +AC_CHECK_PROGS([PYTHON3], [python3])
  AC_PROG_SED
  AC_PROG_MKDIR_P
  
@@@ -534,15 -533,32 +534,32 @@@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, te
  dnl
  dnl library names
  dnl
+ dnl Unfortunately we need to do a few things that libtool can't help us with,
+ dnl so we need some knowledge of shared library filenames:
+ dnl
+ dnl LIB_EXT is the extension used when creating symlinks for alternate
+ dnl filenames for a shared library which will be dynamically loaded
+ dnl
+ dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+ dnl the file for a shared library we wish to link with
+ dnl
  case "$host_os" in
  darwin* )
-     LIB_EXT='dylib' ;;
+     LIB_EXT='dylib'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
  cygwin* )
-     LIB_EXT='dll' ;;
+     LIB_EXT='dll'
+     IMP_LIB_EXT='dll.a'
+     ;;
  aix* )
-     LIB_EXT='a' ;;
+     LIB_EXT='a'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
  * )
-     LIB_EXT='so' ;;
+     LIB_EXT='so'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
  esac
  
  AC_SUBST([LIB_EXT])
@@@ -1111,6 -1127,11 +1128,11 @@@ AC_MSG_RESULT([$with_sha1]
  AC_SUBST(SHA1_LIBS)
  AC_SUBST(SHA1_CFLAGS)
  
+ # Enable a define for SHA1
+ if test "x$with_sha1" != "x"; then
+       DEFINES="$DEFINES -DHAVE_SHA1"
+ fi
  # Allow user to configure out the shader-cache feature
  AC_ARG_ENABLE([shader-cache],
      AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
@@@ -1290,6 -1311,16 +1312,16 @@@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS}
  AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
        [DEFINES="${DEFINES} -DGLX_USE_TLS"])
  
+ dnl Read-only text section on x86 hardened platforms
+ AC_ARG_ENABLE([glx-read-only-text],
+     [AS_HELP_STRING([--enable-glx-read-only-text],
+         [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+     [enable_glx_read_only_text="$enableval"],
+     [enable_glx_read_only_text=no])
+ if test "x$enable_glx_read_only_text" = xyes; then
+     DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+ fi
  dnl
  dnl More DRI setup
  dnl
@@@ -1518,12 -1549,6 +1550,12 @@@ GBM_PC_LIB_PRIV="$DLOPEN_LIBS
  AC_SUBST([GBM_PC_REQ_PRIV])
  AC_SUBST([GBM_PC_LIB_PRIV])
  
 +AM_CONDITIONAL(HAVE_VULKAN, true)
 +
 +AC_ARG_VAR([GLSLC], [Path to the glslc executable])
 +AC_CHECK_PROGS([GLSLC], [glslc])
 +AC_SUBST([GLSLC])
 +
  dnl
  dnl EGL configuration
  dnl
@@@ -2058,7 -2083,7 +2090,7 @@@ radeon_llvm_check() 
      if test "x$enable_gallium_llvm" != "xyes"; then
          AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
      fi
-     llvm_check_version_for "3" "4" "2" $1 
+     llvm_check_version_for "3" "5" "0" $1
      if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
          AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
      fi
@@@ -2146,11 -2171,8 +2178,8 @@@ if test -n "$with_gallium_drivers"; the
              gallium_require_drm "vc4"
              gallium_require_drm_loader
  
-             case "$host_cpu" in
-                 i?86 | x86_64 | amd64)
-                 USE_VC4_SIMULATOR=yes
-                 ;;
-             esac
+             PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                               [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
              ;;
          *)
              AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@@ -2170,10 -2192,14 +2199,14 @@@ if test "x$MESA_LLVM" != x0; the
  
      LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
  
+     dnl llvm-config may not give the right answer when llvm is a built as a
+     dnl single shared library, so we must work the library name out for
+     dnl ourselves.
+     dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
      if test "x$enable_llvm_shared_libs" = xyes; then
          dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
          LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-         AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+         AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
  
          if test "x$llvm_have_one_so" = xyes; then
              dnl LLVM was built using auto*, so there is only one shared object.
          else
              dnl If LLVM was built with CMake, there will be one shared object per
              dnl component.
-             AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+             AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                      [AC_MSG_ERROR([Could not find llvm shared libraries:
        Please make sure you have built llvm with the --enable-shared option
        and that your llvm libraries are installed in $LLVM_LIBDIR
@@@ -2298,13 -2324,6 +2331,13 @@@ AC_SUBST([XA_MINOR], $XA_MINOR
  AC_SUBST([XA_TINY], $XA_TINY)
  AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY")
  
 +PKG_CHECK_MODULES(VALGRIND, [valgrind],
 +                  [have_valgrind=yes], [have_valgrind=no])
 +if test "x$have_valgrind" = "xyes"; then
 +    AC_DEFINE([HAVE_VALGRIND], 1,
 +              [Use valgrind intrinsics to suppress false warnings])
 +fi
 +
  dnl Restore LDFLAGS and CPPFLAGS
  LDFLAGS="$_SAVE_LDFLAGS"
  CPPFLAGS="$_SAVE_CPPFLAGS"
@@@ -2414,9 -2433,6 +2447,9 @@@ AC_CONFIG_FILES([Makefil
                src/mesa/drivers/osmesa/osmesa.pc
                src/mesa/drivers/x11/Makefile
                src/mesa/main/tests/Makefile
 +              src/vulkan/Makefile
 +              src/vulkan/anv_icd.json
 +              src/vulkan/tests/Makefile
                src/util/Makefile
                src/util/tests/hash_table/Makefile])
  
@@@ -2535,7 -2551,6 +2568,7 @@@ if test "x$MESA_LLVM" = x1; the
      echo ""
  fi
  echo "        PYTHON2:         $PYTHON2"
 +echo "        PYTHON3:         $PYTHON3"
  
  echo ""
  echo "        Run '${MAKE-make}' to build Mesa"
diff --combined src/Makefile.am
index da638a811fbd2eec25c1814257071b3300b04730,9e15cca5ea4a3c9dde9a73e437a30de90cb9b8e1..13cfaa5b367b97d422f44f52ba4a8e5092480df4
@@@ -53,10 -53,6 +53,10 @@@ EXTRA_DIST = 
  AM_CFLAGS = $(VISIBILITY_CFLAGS)
  AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
  
 +if HAVE_VULKAN
 +SUBDIRS += vulkan
 +endif
 +
  AM_CPPFLAGS = \
        -I$(top_srcdir)/include/ \
        -I$(top_srcdir)/src/mapi/ \
@@@ -66,6 -62,7 +66,7 @@@
  noinst_LTLIBRARIES = libglsl_util.la
  
  libglsl_util_la_SOURCES = \
+       glsl/shader_enums.c \
        mesa/main/imports.c \
        mesa/program/prog_hash_table.c \
        mesa/program/symbol_table.c \
diff --combined src/glsl/Makefile.am
index 2ab40506e979c04370568b34a57500cffe69a90b,32653911f6c07e7003698bad77506f2201eeb77c..08368311b8a8535439533b1da465a0b846bac41e
@@@ -50,12 -50,14 +50,14 @@@ EXTRA_DIST = tests glcpp/tests README T
        nir/nir_opcodes_c.py                            \
        nir/nir_opcodes_h.py                            \
        nir/nir_opt_algebraic.py                        \
+       nir/tests                                       \
        SConscript
  
  include Makefile.sources
  
  TESTS = glcpp/tests/glcpp-test                                \
        glcpp/tests/glcpp-test-cr-lf                    \
+         nir/tests/control_flow_tests                  \
        tests/blob-test                                 \
        tests/general-ir-test                           \
        tests/optimization-test                         \
@@@ -70,12 -72,13 +72,13 @@@ noinst_LTLIBRARIES = libnir.la libglsl.
  check_PROGRAMS =                                      \
        glcpp/glcpp                                     \
        glsl_test                                       \
+       nir/tests/control_flow_tests                    \
        tests/blob-test                                 \
        tests/general-ir-test                           \
        tests/sampler-types-test                        \
        tests/uniform-initializer-test
  
 -noinst_PROGRAMS = glsl_compiler
 +noinst_PROGRAMS = glsl_compiler spirv2nir
  
  tests_blob_test_SOURCES =                             \
        tests/blob_test.c
@@@ -140,13 -143,16 +143,16 @@@ libglsl_la_SOURCES =                                    
        glsl_parser.cpp                                 \
        glsl_parser.h                                   \
        $(LIBGLSL_FILES)                                \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
  
  libnir_la_SOURCES =                                   \
        glsl_types.cpp                                  \
        builtin_types.cpp                               \
        glsl_symbol_table.cpp                           \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
  
  glsl_compiler_SOURCES = \
        $(GLSL_COMPILER_CXX_FILES)
  glsl_compiler_LDADD =                                 \
        libglsl.la                                      \
        $(top_builddir)/src/libglsl_util.la             \
 +      $(PTHREAD_LIBS)
 +
 +spirv2nir_SOURCES = \
 +      standalone_scaffolding.cpp \
 +      standalone_scaffolding.h \
 +      nir/spirv2nir.c
 +
 +spirv2nir_LDADD =                                     \
 +      libglsl.la                                      \
 +      $(top_builddir)/src/libglsl_util.la             \
        $(PTHREAD_LIBS)
  
  glsl_test_SOURCES = \
@@@ -207,19 -203,23 +213,23 @@@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT
  am__v_YACC_0 = @echo "  YACC    " $@;
  am__v_YACC_1 =
  
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
  glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+       $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
  
  glsl_lexer.cpp: glsl_lexer.ll
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
  
  glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
  
  glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
  
  # Only the parsers (specifically the header files generated at the same time)
  # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@@ -232,11 -232,7 +242,7 @@@ BUILT_SOURCES =                                           
        glsl_lexer.cpp                                  \
        glcpp/glcpp-parse.c                             \
        glcpp/glcpp-lex.c                               \
-       nir/nir_builder_opcodes.h                               \
-       nir/nir_constant_expressions.c                  \
-       nir/nir_opcodes.c                               \
-       nir/nir_opcodes.h                               \
-       nir/nir_opt_algebraic.c
+       $(NIR_GENERATED_FILES)
  CLEANFILES =                                          \
        glcpp/glcpp-parse.h                             \
        glsl_parser.h                                   \
@@@ -249,22 -245,35 +255,35 @@@ dist-hook
        $(RM) glcpp/tests/*.out
        $(RM) glcpp/tests/subtest*/*.out
  
+ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
  nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
  
- nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
  
  nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
  
  nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
  
  nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+ nir_tests_control_flow_tests_SOURCES =                        \
+       nir/tests/control_flow_tests.cpp
+ nir_tests_control_flow_tests_CFLAGS =                 \
+       $(PTHREAD_CFLAGS)
+ nir_tests_control_flow_tests_LDADD =                  \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libnir.la              \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
index acd13ef7745e20efef62245252a185c2f8fcb768,32b6dba2e910987b5126f079a5c8c537f302ea02..0cd3d2847564bab16d7d86975c139d898d83ccc6
@@@ -30,31 -30,37 +30,37 @@@ NIR_FILES = 
        nir/nir_control_flow_private.h \
        nir/nir_dominance.c \
        nir/nir_from_ssa.c \
+       nir/nir_gs_count_vertices.c \
        nir/nir_intrinsics.c \
        nir/nir_intrinsics.h \
        nir/nir_live_variables.c \
        nir/nir_lower_alu_to_scalar.c \
        nir/nir_lower_atomics.c \
+       nir/nir_lower_clip.c \
        nir/nir_lower_global_vars_to_local.c \
+       nir/nir_lower_gs_intrinsics.c \
        nir/nir_lower_load_const_to_scalar.c \
        nir/nir_lower_locals_to_regs.c \
        nir/nir_lower_idiv.c \
        nir/nir_lower_io.c \
        nir/nir_lower_outputs_to_temporaries.c \
        nir/nir_lower_phis_to_scalar.c \
-       nir/nir_lower_samplers.cpp \
+       nir/nir_lower_samplers.c \
        nir/nir_lower_system_values.c \
-       nir/nir_lower_tex_projector.c \
+       nir/nir_lower_tex.c \
        nir/nir_lower_to_source_mods.c \
+       nir/nir_lower_two_sided_color.c \
        nir/nir_lower_vars_to_ssa.c \
        nir/nir_lower_var_copies.c \
        nir/nir_lower_vec_to_movs.c \
        nir/nir_metadata.c \
+       nir/nir_move_vec_src_uses_to_dest.c \
        nir/nir_normalize_cubemap_coords.c \
        nir/nir_opt_constant_folding.c \
        nir/nir_opt_copy_propagate.c \
        nir/nir_opt_cse.c \
        nir/nir_opt_dce.c \
+       nir/nir_opt_dead_cf.c \
        nir/nir_opt_gcm.c \
        nir/nir_opt_global_to_local.c \
        nir/nir_opt_peephole_ffma.c \
@@@ -65,7 -71,6 +71,7 @@@
        nir/nir_remove_dead_variables.c \
        nir/nir_search.c \
        nir/nir_search.h \
 +      nir/nir_spirv.h \
        nir/nir_split_var_copies.c \
        nir/nir_sweep.c \
        nir/nir_to_ssa.c \
        nir/nir_vla.h \
        nir/nir_worklist.c \
        nir/nir_worklist.h \
 -      nir/nir_types.cpp
 +      nir/nir_types.cpp \
 +      nir/spirv_to_nir.c \
-       nir/spirv_glsl450_to_nir.c \
-       $(NIR_GENERATED_FILES)
++      nir/spirv_glsl450_to_nir.c
  
  # libglsl
  
diff --combined src/glsl/ast_to_hir.cpp
index 81b44bd6786c8c8f427650159bf3953da9d26dfe,6899a554f2c92cb5779bfa42c4301855be6c304a..351aafc1a72aee29aa274403725ed8b5453da5db
@@@ -67,6 -67,48 +67,48 @@@ static voi
  remove_per_vertex_blocks(exec_list *instructions,
                           _mesa_glsl_parse_state *state, ir_variable_mode mode);
  
+ /**
+  * Visitor class that finds the first instance of any write-only variable that
+  * is ever read, if any
+  */
+ class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+ {
+ public:
+    read_from_write_only_variable_visitor() : found(NULL)
+    {
+    }
+    virtual ir_visitor_status visit(ir_dereference_variable *ir)
+    {
+       if (this->in_assignee)
+          return visit_continue;
+       ir_variable *var = ir->variable_referenced();
+       /* We can have image_write_only set on both images and buffer variables,
+        * but in the former there is a distinction between reads from
+        * the variable itself (write_only) and from the memory they point to
+        * (image_write_only), while in the case of buffer variables there is
+        * no such distinction, that is why this check here is limited to
+        * buffer variables alone.
+        */
+       if (!var || var->data.mode != ir_var_shader_storage)
+          return visit_continue;
+       if (var->data.image_write_only) {
+          found = var;
+          return visit_stop;
+       }
+       return visit_continue;
+    }
+    ir_variable *get_variable() {
+       return found;
+    }
+ private:
+    ir_variable *found;
+ };
  
  void
  _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
      */
     remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
     remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+    /* Check that we don't have reads from write-only variables */
+    read_from_write_only_variable_visitor v;
+    v.run(instructions);
+    ir_variable *error_var = v.get_variable();
+    if (error_var) {
+       /* It would be nice to have proper location information, but for that
+        * we would need to check this as we process each kind of AST node
+        */
+       YYLTYPE loc;
+       memset(&loc, 0, sizeof(loc));
+       _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+                        error_var->name);
+    }
  }
  
  
@@@ -820,7 -876,16 +876,16 @@@ do_assignment(exec_list *instructions, 
                            "assignment to %s",
                            non_lvalue_description);
           error_emitted = true;
-       } else if (lhs_var != NULL && lhs_var->data.read_only) {
+       } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+                  (lhs_var->data.mode == ir_var_shader_storage &&
+                   lhs_var->data.image_read_only))) {
+          /* We can have image_read_only set on both images and buffer variables,
+           * but in the former there is a distinction between assignments to
+           * the variable itself (read_only) and to the memory they point to
+           * (image_read_only), while in the case of buffer variables there is
+           * no such distinction, that is why this check here is limited to
+           * buffer variables alone.
+           */
           _mesa_glsl_error(&lhs_loc, state,
                            "assignment to read-only variable '%s'",
                            lhs_var->name);
@@@ -1019,7 -1084,6 +1084,7 @@@ do_comparison(void *mem_ctx, int operat
     case GLSL_TYPE_SAMPLER:
     case GLSL_TYPE_IMAGE:
     case GLSL_TYPE_INTERFACE:
 +   case GLSL_TYPE_FUNCTION:
     case GLSL_TYPE_ATOMIC_UINT:
     case GLSL_TYPE_SUBROUTINE:
        /* I assume a comparison of a struct containing a sampler just
@@@ -2115,7 -2179,7 +2180,7 @@@ validate_binding_qualifier(struct _mesa
     }
  
     const struct gl_context *const ctx = state->ctx;
-    unsigned elements = type->is_array() ? type->length : 1;
+    unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
     unsigned max_index = qual->binding + elements - 1;
     const glsl_type *base_type = type->without_array();
  
@@@ -2921,11 -2985,13 +2986,13 @@@ apply_type_qualifier_to_variable(const 
         var->data.depth_layout = ir_depth_layout_none;
  
     if (qual->flags.q.std140 ||
+        qual->flags.q.std430 ||
         qual->flags.q.packed ||
         qual->flags.q.shared) {
        _mesa_glsl_error(loc, state,
-                        "uniform block layout qualifiers std140, packed, and "
-                        "shared can only be applied to uniform blocks, not "
+                        "uniform and shader storage block layout qualifiers "
+                        "std140, std430, packed, and shared can only be "
+                        "applied to uniform or shader storage blocks, not "
                         "members");
     }
  
@@@ -4579,7 -4645,7 +4646,7 @@@ ast_function::hir(exec_list *instructio
     if (state->es_shader && state->language_version >= 300) {
        /* Local shader has no exact candidates; check the built-ins. */
        _mesa_glsl_initialize_builtin_functions();
-       if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+       if (_mesa_glsl_find_builtin_function_by_name(name)) {
           YYLTYPE loc = this->get_location();
           _mesa_glsl_error(& loc, state,
                            "A shader cannot redefine or overload built-in "
@@@ -5605,10 -5671,19 +5672,19 @@@ ast_process_structure_or_interface_bloc
                                           bool is_interface,
                                           enum glsl_matrix_layout matrix_layout,
                                           bool allow_reserved_names,
-                                          ir_variable_mode var_mode)
+                                          ir_variable_mode var_mode,
+                                          ast_type_qualifier *layout)
  {
     unsigned decl_count = 0;
  
+    /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+     * that we don't have incompatible qualifiers
+     */
+    if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+       _mesa_glsl_error(&loc, state,
+                        "Interface block sets both readonly and writeonly");
+    }
     /* Make an initial pass over the list of fields to determine how
      * many there are.  Each element in this list is an ast_declarator_list.
      * This means that we actually need to count the number of elements in the
            * is_interface case, will have resulted in compilation having
            * already halted due to a syntax error.
            */
-          const struct glsl_type *field_type =
-             decl_type != NULL ? decl_type : glsl_type::error_type;
+          assert(decl_type);
  
-          if (is_interface && field_type->contains_opaque()) {
+          if (is_interface && decl_type->contains_opaque()) {
              YYLTYPE loc = decl_list->get_location();
              _mesa_glsl_error(&loc, state,
                               "uniform/buffer in non-default interface block contains "
                               "opaque variable");
           }
  
-          if (field_type->contains_atomic()) {
+          if (decl_type->contains_atomic()) {
              /* From section 4.1.7.3 of the GLSL 4.40 spec:
               *
               *    "Members of structures cannot be declared as atomic counter
                               "shader storage block or uniform block");
           }
  
-          if (field_type->contains_image()) {
+          if (decl_type->contains_image()) {
              /* FINISHME: Same problem as with atomic counters.
               * FINISHME: Request clarification from Khronos and add
               * FINISHME: spec quotation here.
           const struct ast_type_qualifier *const qual =
              & decl_list->type->qualifier;
           if (qual->flags.q.std140 ||
+              qual->flags.q.std430 ||
               qual->flags.q.packed ||
               qual->flags.q.shared) {
              _mesa_glsl_error(&loc, state,
                               "uniform/shader storage block layout qualifiers "
-                              "std140, packed, and shared can only be applied "
-                              "to uniform/shader storage blocks, not members");
+                              "std140, std430, packed, and shared can only be "
+                              "applied to uniform/shader storage blocks, not "
+                              "members");
           }
  
           if (qual->flags.q.constant) {
                               "to struct or interface block members");
           }
  
-          field_type = process_array_type(&loc, decl_type,
-                                          decl->array_specifier, state);
+          const struct glsl_type *field_type =
+             process_array_type(&loc, decl_type, decl->array_specifier, state);
           fields[i].type = field_type;
           fields[i].name = decl->identifier;
           fields[i].location = -1;
                     || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
           }
  
+          /* Image qualifiers are allowed on buffer variables, which can only
+           * be defined inside shader storage buffer objects
+           */
+          if (layout && var_mode == ir_var_shader_storage) {
+             if (qual->flags.q.read_only && qual->flags.q.write_only) {
+                _mesa_glsl_error(&loc, state,
+                                 "buffer variable `%s' can't be "
+                                 "readonly and writeonly.", fields[i].name);
+             }
+             /* For readonly and writeonly qualifiers the field definition,
+              * if set, overwrites the layout qualifier.
+              */
+             bool read_only = layout->flags.q.read_only;
+             bool write_only = layout->flags.q.write_only;
+             if (qual->flags.q.read_only) {
+                read_only = true;
+                write_only = false;
+             } else if (qual->flags.q.write_only) {
+                read_only = false;
+                write_only = true;
+             }
+             fields[i].image_read_only = read_only;
+             fields[i].image_write_only = write_only;
+             /* For other qualifiers, we set the flag if either the layout
+              * qualifier or the field qualifier are set
+              */
+             fields[i].image_coherent = qual->flags.q.coherent ||
+                                         layout->flags.q.coherent;
+             fields[i].image_volatile = qual->flags.q._volatile ||
+                                         layout->flags.q._volatile;
+             fields[i].image_restrict = qual->flags.q.restrict_flag ||
+                                         layout->flags.q.restrict_flag;
+          }
           i++;
        }
     }
@@@ -5822,7 -5936,8 +5937,8 @@@ ast_struct_specifier::hir(exec_list *in
                                                 false,
                                                 GLSL_MATRIX_LAYOUT_INHERITED,
                                                 false /* allow_reserved_names */,
-                                                ir_var_auto);
+                                                ir_var_auto,
+                                                NULL);
  
     validate_identifier(this->name, loc, state);
  
@@@ -5881,6 -5996,19 +5997,19 @@@ private
     bool found;
  };
  
+ static bool
+ is_unsized_array_last_element(ir_variable *v)
+ {
+    const glsl_type *interface_type = v->get_interface_type();
+    int length = interface_type->length;
+    assert(v->type->is_unsized_array());
+    /* Check if it is the last element of the interface */
+    if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+       return true;
+    return false;
+ }
  
  ir_rvalue *
  ast_interface_block::hir(exec_list *instructions,
                         this->block_name);
     }
  
+    if (!this->layout.flags.q.buffer &&
+        this->layout.flags.q.std430) {
+       _mesa_glsl_error(&loc, state,
+                        "std430 storage block layout qualifier is supported "
+                        "only for shader storage blocks");
+    }
     /* The ast_interface_block has a list of ast_declarator_lists.  We
      * need to turn those into ir_variables with an association
      * with this uniform block.
        packing = GLSL_INTERFACE_PACKING_SHARED;
     } else if (this->layout.flags.q.packed) {
        packing = GLSL_INTERFACE_PACKING_PACKED;
+    } else if (this->layout.flags.q.std430) {
+       packing = GLSL_INTERFACE_PACKING_STD430;
     } else {
        /* The default layout is std140.
         */
                                                 true,
                                                 matrix_layout,
                                                 redeclaring_per_vertex,
-                                                var_mode);
+                                                var_mode,
+                                                &this->layout);
  
     state->struct_specifier_depth--;
  
        else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
           handle_tess_ctrl_shader_output_decl(state, loc, var);
  
+       for (unsigned i = 0; i < num_variables; i++) {
+          if (fields[i].type->is_unsized_array()) {
+             if (var_mode == ir_var_shader_storage) {
+                if (i != (num_variables - 1)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    fields[i].name);
+                }
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  fields[i].name);
+                }
+             }
+          }
+       }
        if (ir_variable *earlier =
            state->symbols->get_variable(this->instance_name)) {
           if (!redeclaring_per_vertex) {
  
           var->data.stream = this->layout.stream;
  
+          if (var->data.mode == ir_var_shader_storage) {
+             var->data.image_read_only = fields[i].image_read_only;
+             var->data.image_write_only = fields[i].image_write_only;
+             var->data.image_coherent = fields[i].image_coherent;
+             var->data.image_volatile = fields[i].image_volatile;
+             var->data.image_restrict = fields[i].image_restrict;
+          }
           /* Examine var name here since var may get deleted in the next call */
           bool var_is_gl_id = is_gl_identifier(var->name);
  
           var->data.explicit_binding = this->layout.flags.q.explicit_binding;
           var->data.binding = this->layout.binding;
  
+          if (var->type->is_unsized_array()) {
+             if (var->is_in_shader_storage_block()) {
+                if (!is_unsized_array_last_element(var)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    var->name);
+                }
+                var->data.from_ssbo_unsized_array = true;
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  var->name);
+                }
+             }
+          }
           state->symbols->add_variable(var);
           instructions->push_tail(var);
        }
index 5c8f98b091d96f606e06807a584407a5912705b6,f5542415d4a4e11a02a3d28953d288f603c94156..c02d28c0866dd72d959c9b5f88d685ca4ab5d2b3
@@@ -87,8 -87,6 +87,8 @@@ _mesa_glsl_parse_state::_mesa_glsl_pars
  
     this->extensions = &ctx->Extensions;
  
 +   this->ARB_compute_shader_enable = true;
 +
     this->Const.MaxLights = ctx->Const.MaxLights;
     this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes;
     this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits;
     this->default_uniform_qualifier = new(this) ast_type_qualifier();
     this->default_uniform_qualifier->flags.q.shared = 1;
     this->default_uniform_qualifier->flags.q.column_major = 1;
+    this->default_uniform_qualifier->is_default_qualifier = true;
+    this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+    this->default_shader_storage_qualifier->flags.q.shared = 1;
+    this->default_shader_storage_qualifier->flags.q.column_major = 1;
+    this->default_shader_storage_qualifier->is_default_qualifier = true;
  
     this->fs_uses_gl_fragcoord = false;
     this->fs_redeclares_gl_fragcoord = false;
@@@ -604,8 -608,9 +610,9 @@@ static const _mesa_glsl_extension _mesa
     EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
     EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
     EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
-    EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
+    EXT(ARB_shader_storage_buffer_object, true,  true,      ARB_shader_storage_buffer_object),
     EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+    EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
     EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
     EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
     EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
@@@ -859,6 -864,139 +866,139 @@@ _mesa_ast_set_aggregate_type(const glsl
     }
  }
  
+ void
+ _mesa_ast_process_interface_block(YYLTYPE *locp,
+                                   _mesa_glsl_parse_state *state,
+                                   ast_interface_block *const block,
+                                   const struct ast_type_qualifier q)
+ {
+    if (q.flags.q.buffer) {
+       if (!state->has_shader_storage_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 430 / GL_ARB_shader_storage_buffer_object "
+                           "required for defining shader storage blocks");
+       } else if (state->ARB_shader_storage_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 430 / GL_ARB_shader_storage_buffer_object "
+                             "required for defining shader storage blocks");
+       }
+    } else if (q.flags.q.uniform) {
+       if (!state->has_uniform_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 140 / GL_ARB_uniform_buffer_object "
+                           "required for defining uniform blocks");
+       } else if (state->ARB_uniform_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 140 / GL_ARB_uniform_buffer_object "
+                             "required for defining uniform blocks");
+       }
+    } else {
+       if (state->es_shader || state->language_version < 150) {
+          _mesa_glsl_error(locp, state,
+                           "#version 150 required for using "
+                           "interface blocks");
+       }
+    }
+    /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+     * "It is illegal to have an input block in a vertex shader
+     *  or an output block in a fragment shader"
+     */
+    if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+       _mesa_glsl_error(locp, state,
+                        "`in' interface block is not allowed for "
+                        "a vertex shader");
+    } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+       _mesa_glsl_error(locp, state,
+                        "`out' interface block is not allowed for "
+                        "a fragment shader");
+    }
+    /* Since block arrays require names, and both features are added in
+     * the same language versions, we don't have to explicitly
+     * version-check both things.
+     */
+    if (block->instance_name != NULL) {
+       state->check_version(150, 300, locp, "interface blocks with "
+                            "an instance name are not allowed");
+    }
+    uint64_t interface_type_mask;
+    struct ast_type_qualifier temp_type_qualifier;
+    /* Get a bitmask containing only the in/out/uniform/buffer
+     * flags, allowing us to ignore other irrelevant flags like
+     * interpolation qualifiers.
+     */
+    temp_type_qualifier.flags.i = 0;
+    temp_type_qualifier.flags.q.uniform = true;
+    temp_type_qualifier.flags.q.in = true;
+    temp_type_qualifier.flags.q.out = true;
+    temp_type_qualifier.flags.q.buffer = true;
+    interface_type_mask = temp_type_qualifier.flags.i;
+    /* Get the block's interface qualifier.  The interface_qualifier
+     * production rule guarantees that only one bit will be set (and
+     * it will be in/out/uniform).
+     */
+    uint64_t block_interface_qualifier = q.flags.i;
+    block->layout.flags.i |= block_interface_qualifier;
+    if (state->stage == MESA_SHADER_GEOMETRY &&
+        state->has_explicit_attrib_stream()) {
+       /* Assign global layout's stream value. */
+       block->layout.flags.q.stream = 1;
+       block->layout.flags.q.explicit_stream = 0;
+       block->layout.stream = state->out_qualifier->stream;
+    }
+    foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+       ast_type_qualifier& qualifier = member->type->qualifier;
+       if ((qualifier.flags.i & interface_type_mask) == 0) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If no optional qualifier is used in a member declaration, the
+           *  qualifier of the variable is just in, out, or uniform as declared
+           *  by interface-qualifier."
+           */
+          qualifier.flags.i |= block_interface_qualifier;
+       } else if ((qualifier.flags.i & interface_type_mask) !=
+                  block_interface_qualifier) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If optional qualifiers are used, they can include interpolation
+           *  and storage qualifiers and they must declare an input, output,
+           *  or uniform variable consistent with the interface qualifier of
+           *  the block."
+           */
+          _mesa_glsl_error(locp, state,
+                           "uniform/in/out qualifier on "
+                           "interface block member does not match "
+                           "the interface block");
+       }
+       /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+        *
+        * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+        * outputs."
+        *
+        * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+        *
+        * "Only variables output from a shader can be candidates for
+        * invariance."
+        *
+        * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+        *
+        * "If optional qualifiers are used, they can include interpolation
+        * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+        * and they must declare an input, output, or uniform member
+        * consistent with the interface qualifier of the block"
+        */
+       if (qualifier.flags.q.invariant)
+          _mesa_glsl_error(locp, state,
+                           "invariant qualifiers cannot be used "
+                           "with interface blocks members");
+    }
+ }
  
  void
  _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
@@@ -1695,6 -1833,8 +1835,8 @@@ _mesa_glsl_compile_shader(struct gl_con
        }
     }
  
+    _mesa_glsl_initialize_derived_variables(shader);
     delete state->symbols;
     ralloc_free(state);
  }
diff --combined src/glsl/glsl_types.cpp
index c737fb6e36639eb838b0d8116ced8e535346e3ed,0ead0f2a32716d7b341519aaccd50c36ee054b1e..15cd45ea708e4adf458536c638eec7293ffdf541
@@@ -32,7 -32,6 +32,7 @@@ mtx_t glsl_type::mutex = _MTX_INITIALIZ
  hash_table *glsl_type::array_types = NULL;
  hash_table *glsl_type::record_types = NULL;
  hash_table *glsl_type::interface_types = NULL;
 +hash_table *glsl_type::function_types = NULL;
  hash_table *glsl_type::subroutine_types = NULL;
  void *glsl_type::mem_ctx = NULL;
  
@@@ -46,8 -45,8 +46,8 @@@ glsl_type::init_ralloc_type_ctx(void
  }
  
  glsl_type::glsl_type(GLenum gl_type,
-                    glsl_base_type base_type, unsigned vector_elements,
-                    unsigned matrix_columns, const char *name) :
+                      glsl_base_type base_type, unsigned vector_elements,
+                      unsigned matrix_columns, const char *name) :
     gl_type(gl_type),
     base_type(base_type),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@@ -70,8 -69,8 +70,8 @@@
  }
  
  glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
-                    enum glsl_sampler_dim dim, bool shadow, bool array,
-                    unsigned type, const char *name) :
+                      enum glsl_sampler_dim dim, bool shadow, bool array,
+                      unsigned type, const char *name) :
     gl_type(gl_type),
     base_type(base_type),
     sampler_dimensionality(dim), sampler_shadow(shadow),
@@@ -97,7 -96,7 +97,7 @@@
  }
  
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    const char *name) :
+                      const char *name) :
     gl_type(0),
     base_type(GLSL_TYPE_STRUCT),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
     assert(name != NULL);
     this->name = ralloc_strdup(this->mem_ctx, name);
     this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                          glsl_struct_field, length);
  
     for (i = 0; i < length; i++) {
        this->fields.structure[i].type = fields[i].type;
        this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                      fields[i].name);
        this->fields.structure[i].location = fields[i].location;
        this->fields.structure[i].interpolation = fields[i].interpolation;
        this->fields.structure[i].centroid = fields[i].centroid;
        this->fields.structure[i].sample = fields[i].sample;
        this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
        this->fields.structure[i].patch = fields[i].patch;
+       this->fields.structure[i].image_read_only = fields[i].image_read_only;
+       this->fields.structure[i].image_write_only = fields[i].image_write_only;
+       this->fields.structure[i].image_coherent = fields[i].image_coherent;
+       this->fields.structure[i].image_volatile = fields[i].image_volatile;
+       this->fields.structure[i].image_restrict = fields[i].image_restrict;
     }
  
     mtx_unlock(&glsl_type::mutex);
  }
  
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    enum glsl_interface_packing packing, const char *name) :
+                      enum glsl_interface_packing packing, const char *name) :
     gl_type(0),
     base_type(GLSL_TYPE_INTERFACE),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
     assert(name != NULL);
     this->name = ralloc_strdup(this->mem_ctx, name);
     this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                          glsl_struct_field, length);
     for (i = 0; i < length; i++) {
        this->fields.structure[i].type = fields[i].type;
        this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                      fields[i].name);
        this->fields.structure[i].location = fields[i].location;
        this->fields.structure[i].interpolation = fields[i].interpolation;
        this->fields.structure[i].centroid = fields[i].centroid;
     mtx_unlock(&glsl_type::mutex);
  }
  
 +glsl_type::glsl_type(const glsl_type *return_type,
 +                     const glsl_function_param *params, unsigned num_params) :
 +   gl_type(0),
 +   base_type(GLSL_TYPE_FUNCTION),
 +   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
 +   sampler_type(0), interface_packing(0),
 +   vector_elements(0), matrix_columns(0),
 +   length(num_params)
 +{
 +   unsigned int i;
 +
 +   mtx_lock(&glsl_type::mutex);
 +
 +   init_ralloc_type_ctx();
 +
 +   this->fields.parameters = rzalloc_array(this->mem_ctx,
 +                                           glsl_function_param, num_params + 1);
 +
 +   /* We store the return type as the first parameter */
 +   this->fields.parameters[0].type = return_type;
 +   this->fields.parameters[0].in = false;
 +   this->fields.parameters[0].out = true;
 +
 +   /* We store the i'th parameter in slot i+1 */
 +   for (i = 0; i < length; i++) {
 +      this->fields.parameters[i + 1].type = params[i].type;
 +      this->fields.parameters[i + 1].in = params[i].in;
 +      this->fields.parameters[i + 1].out = params[i].out;
 +   }
 +
 +   mtx_unlock(&glsl_type::mutex);
 +}
 +
  glsl_type::glsl_type(const char *subroutine_name) :
     gl_type(0),
     base_type(GLSL_TYPE_SUBROUTINE),
@@@ -220,8 -191,8 +225,8 @@@ glsl_type::contains_sampler() cons
        return this->fields.array->contains_sampler();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_sampler())
-           return true;
+          if (this->fields.structure[i].type->contains_sampler())
+             return true;
        }
        return false;
     } else {
@@@ -237,8 -208,8 +242,8 @@@ glsl_type::contains_integer() cons
        return this->fields.array->contains_integer();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_integer())
-           return true;
+          if (this->fields.structure[i].type->contains_integer())
+             return true;
        }
        return false;
     } else {
@@@ -253,8 -224,8 +258,8 @@@ glsl_type::contains_double() cons
        return this->fields.array->contains_double();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_double())
-           return true;
+          if (this->fields.structure[i].type->contains_double())
+             return true;
        }
        return false;
     } else {
@@@ -289,8 -260,8 +294,8 @@@ glsl_type::contains_subroutine() cons
        return this->fields.array->contains_subroutine();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_subroutine())
-           return true;
+          if (this->fields.structure[i].type->contains_subroutine())
+             return true;
        }
        return false;
     } else {
@@@ -335,8 -306,8 +340,8 @@@ glsl_type::contains_image() cons
        return this->fields.array->contains_image();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_image())
-           return true;
+          if (this->fields.structure[i].type->contains_image())
+             return true;
        }
        return false;
     } else {
@@@ -536,21 -507,21 +541,21 @@@ glsl_type::get_instance(unsigned base_t
     if (columns == 1) {
        switch (base_type) {
        case GLSL_TYPE_UINT:
-        return uvec(rows);
+          return uvec(rows);
        case GLSL_TYPE_INT:
-        return ivec(rows);
+          return ivec(rows);
        case GLSL_TYPE_FLOAT:
-        return vec(rows);
+          return vec(rows);
        case GLSL_TYPE_DOUBLE:
-        return dvec(rows);
+          return dvec(rows);
        case GLSL_TYPE_BOOL:
-        return bvec(rows);
+          return bvec(rows);
        default:
-        return error_type;
+          return error_type;
        }
     } else {
        if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
-        return error_type;
+          return error_type;
  
        /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
         * combinations are valid:
@@@ -772,10 -743,10 +777,10 @@@ glsl_type::record_compare(const glsl_ty
  
     for (unsigned i = 0; i < this->length; i++) {
        if (this->fields.structure[i].type != b->fields.structure[i].type)
-        return false;
+          return false;
        if (strcmp(this->fields.structure[i].name,
-                b->fields.structure[i].name) != 0)
-        return false;
+                  b->fields.structure[i].name) != 0)
+          return false;
        if (this->fields.structure[i].matrix_layout
           != b->fields.structure[i].matrix_layout)
          return false;
        if (this->fields.structure[i].patch
            != b->fields.structure[i].patch)
           return false;
+       if (this->fields.structure[i].image_read_only
+           != b->fields.structure[i].image_read_only)
+          return false;
+       if (this->fields.structure[i].image_write_only
+           != b->fields.structure[i].image_write_only)
+          return false;
+       if (this->fields.structure[i].image_coherent
+           != b->fields.structure[i].image_coherent)
+          return false;
+       if (this->fields.structure[i].image_volatile
+           != b->fields.structure[i].image_volatile)
+          return false;
+       if (this->fields.structure[i].image_restrict
+           != b->fields.structure[i].image_restrict)
+          return false;
     }
  
     return true;
@@@ -836,8 -822,8 +856,8 @@@ glsl_type::record_key_hash(const void *
  
  const glsl_type *
  glsl_type::get_record_instance(const glsl_struct_field *fields,
-                              unsigned num_fields,
-                              const char *name)
+                                unsigned num_fields,
+                                const char *name)
  {
     const glsl_type key(fields, num_fields, name);
  
  
  const glsl_type *
  glsl_type::get_interface_instance(const glsl_struct_field *fields,
-                                 unsigned num_fields,
-                                 enum glsl_interface_packing packing,
-                                 const char *block_name)
+                                   unsigned num_fields,
+                                   enum glsl_interface_packing packing,
+                                   const char *block_name)
  {
     const glsl_type key(fields, num_fields, packing, block_name);
  
@@@ -934,74 -920,6 +954,74 @@@ glsl_type::get_subroutine_instance(cons
  }
  
  
 +static bool
 +function_key_compare(const void *a, const void *b)
 +{
 +   const glsl_type *const key1 = (glsl_type *) a;
 +   const glsl_type *const key2 = (glsl_type *) b;
 +
 +   if (key1->length != key2->length)
 +      return 1;
 +
 +   return memcmp(key1->fields.parameters, key2->fields.parameters,
 +                 (key1->length + 1) * sizeof(*key1->fields.parameters));
 +}
 +
 +
 +static uint32_t
 +function_key_hash(const void *a)
 +{
 +   const glsl_type *const key = (glsl_type *) a;
 +   char hash_key[128];
 +   unsigned size = 0;
 +
 +   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
 +
 +   for (unsigned i = 0; i < key->length; i++) {
 +      if (size >= sizeof(hash_key))
 +       break;
 +
 +      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
 +                     "%p", (void *) key->fields.structure[i].type);
 +   }
 +
 +   return _mesa_hash_string(hash_key);
 +}
 +
 +const glsl_type *
 +glsl_type::get_function_instance(const glsl_type *return_type,
 +                                 const glsl_function_param *params,
 +                                 unsigned num_params)
 +{
 +   const glsl_type key(return_type, params, num_params);
 +
 +   mtx_lock(&glsl_type::mutex);
 +
 +   if (function_types == NULL) {
 +      function_types = _mesa_hash_table_create(NULL, function_key_hash,
 +                                               function_key_compare);
 +   }
 +
 +   struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
 +   if (entry == NULL) {
 +      mtx_unlock(&glsl_type::mutex);
 +      const glsl_type *t = new glsl_type(return_type, params, num_params);
 +      mtx_lock(&glsl_type::mutex);
 +
 +      entry = _mesa_hash_table_insert(function_types, t, (void *) t);
 +   }
 +
 +   const glsl_type *t = (const glsl_type *)entry->data;
 +
 +   assert(t->base_type == GLSL_TYPE_FUNCTION);
 +   assert(t->length == num_params);
 +
 +   mtx_unlock(&glsl_type::mutex);
 +
 +   return t;
 +}
 +
 +
  const glsl_type *
  glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
  {
@@@ -1078,7 -996,7 +1098,7 @@@ glsl_type::field_type(const char *name
  
     for (unsigned i = 0; i < this->length; i++) {
        if (strcmp(name, this->fields.structure[i].name) == 0)
-        return this->fields.structure[i].type;
+          return this->fields.structure[i].type;
     }
  
     return error_type;
@@@ -1094,7 -1012,7 +1114,7 @@@ glsl_type::field_index(const char *name
  
     for (unsigned i = 0; i < this->length; i++) {
        if (strcmp(name, this->fields.structure[i].name) == 0)
-        return i;
+          return i;
     }
  
     return -1;
@@@ -1119,7 -1037,7 +1139,7 @@@ glsl_type::component_slots() cons
        unsigned size = 0;
  
        for (unsigned i = 0; i < this->length; i++)
-        size += this->fields.structure[i].type->component_slots();
+          size += this->fields.structure[i].type->component_slots();
  
        return size;
     }
        return 1;
     case GLSL_TYPE_SUBROUTINE:
       return 1;
 +
 +   case GLSL_TYPE_FUNCTION:
     case GLSL_TYPE_SAMPLER:
     case GLSL_TYPE_ATOMIC_UINT:
     case GLSL_TYPE_VOID:
     return 0;
  }
  
+ unsigned
+ glsl_type::record_location_offset(unsigned length) const
+ {
+    unsigned offset = 0;
+    const glsl_type *t = this->without_array();
+    if (t->is_record()) {
+       assert(length <= t->length);
+       for (unsigned i = 0; i < length; i++) {
+          const glsl_type *st = t->fields.structure[i].type;
+          const glsl_type *wa = st->without_array();
+          if (wa->is_record()) {
+             unsigned r_offset = wa->record_location_offset(wa->length);
+             offset += st->is_array() ? st->length * r_offset : r_offset;
+          } else {
+             /* We dont worry about arrays here because unless the array
+              * contains a structure or another array it only takes up a single
+              * uniform slot.
+              */
+             offset += 1;
+          }
+       }
+    }
+    return offset;
+ }
  unsigned
  glsl_type::uniform_locations() const
  {
@@@ -1231,12 -1173,12 +1277,12 @@@ glsl_type::std140_base_alignment(bool r
     if (this->is_scalar() || this->is_vector()) {
        switch (this->vector_elements) {
        case 1:
-        return N;
+          return N;
        case 2:
-        return 2 * N;
+          return 2 * N;
        case 3:
        case 4:
-        return 4 * N;
+          return 4 * N;
        }
     }
  
      */
     if (this->is_array()) {
        if (this->fields.array->is_scalar() ||
-         this->fields.array->is_vector() ||
-         this->fields.array->is_matrix()) {
-        return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+           this->fields.array->is_vector() ||
+           this->fields.array->is_matrix()) {
+          return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
        } else {
-        assert(this->fields.array->is_record() ||
+          assert(this->fields.array->is_record() ||
                  this->fields.array->is_array());
-        return this->fields.array->std140_base_alignment(row_major);
+          return this->fields.array->std140_base_alignment(row_major);
        }
     }
  
        int r = this->vector_elements;
  
        if (row_major) {
-        vec_type = get_instance(base_type, c, 1);
-        array_type = glsl_type::get_array_instance(vec_type, r);
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
        } else {
-        vec_type = get_instance(base_type, r, 1);
-        array_type = glsl_type::get_array_instance(vec_type, c);
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
        }
  
        return array_type->std140_base_alignment(false);
              field_row_major = false;
           }
  
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        base_alignment = MAX2(base_alignment,
-                              field_type->std140_base_alignment(field_row_major));
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std140_base_alignment(field_row_major));
        }
        return base_alignment;
     }
@@@ -1374,25 -1316,25 +1420,25 @@@ glsl_type::std140_size(bool row_major) 
        unsigned int array_len;
  
        if (this->is_array()) {
-        element_type = this->fields.array;
-        array_len = this->length;
+          element_type = this->fields.array;
+          array_len = this->length;
        } else {
-        element_type = this;
-        array_len = 1;
+          element_type = this;
+          array_len = 1;
        }
  
        if (row_major) {
           vec_type = get_instance(element_type->base_type,
                                   element_type->matrix_columns, 1);
  
-        array_len *= element_type->vector_elements;
+          array_len *= element_type->vector_elements;
        } else {
-        vec_type = get_instance(element_type->base_type,
-                                element_type->vector_elements, 1);
-        array_len *= element_type->matrix_columns;
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
        }
        const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
-                                                                 array_len);
+                                                                   array_len);
  
        return array_type->std140_size(false);
     }
      */
     if (this->is_array()) {
        if (this->fields.array->is_record()) {
-        return this->length * this->fields.array->std140_size(row_major);
+          return this->length * this->fields.array->std140_size(row_major);
        } else {
-        unsigned element_base_align =
-           this->fields.array->std140_base_alignment(row_major);
-        return this->length * MAX2(element_base_align, 16);
+          unsigned element_base_align =
+             this->fields.array->std140_base_alignment(row_major);
+          return this->length * MAX2(element_base_align, 16);
        }
     }
  
      *     rounded up to the next multiple of the base alignment of the
      *     structure.
      */
-    if (this->is_record()) {
+    if (this->is_record() || this->is_interface()) {
        unsigned size = 0;
        unsigned max_align = 0;
  
              field_row_major = false;
           }
  
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        unsigned align = field_type->std140_base_alignment(field_row_major);
-        size = glsl_align(size, align);
-        size += field_type->std140_size(field_row_major);
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std140_base_alignment(field_row_major);
+          /* Ignore unsized arrays when calculating size */
+          if (field_type->is_unsized_array())
+             continue;
+          size = glsl_align(size, align);
+          size += field_type->std140_size(field_row_major);
  
           max_align = MAX2(align, max_align);
  
     return -1;
  }
  
+ unsigned
+ glsl_type::std430_base_alignment(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_scalar() || this->is_vector()) {
+       switch (this->vector_elements) {
+       case 1:
+          return N;
+       case 2:
+          return 2 * N;
+       case 3:
+       case 4:
+          return 4 * N;
+       }
+    }
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_array())
+       return this->fields.array->std430_base_alignment(row_major);
+    /* (5) If the member is a column-major matrix with <C> columns and
+     *     <R> rows, the matrix is stored identically to an array of
+     *     <C> column vectors with <R> components each, according to
+     *     rule (4).
+     *
+     * (7) If the member is a row-major matrix with <C> columns and <R>
+     *     rows, the matrix is stored identically to an array of <R>
+     *     row vectors with <C> components each, according to rule (4).
+     */
+    if (this->is_matrix()) {
+       const struct glsl_type *vec_type, *array_type;
+       int c = this->matrix_columns;
+       int r = this->vector_elements;
+       if (row_major) {
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
+       } else {
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
+       }
+       return array_type->std430_base_alignment(false);
+    }
+       /* (9) If the member is a structure, the base alignment of the
+     *     structure is <N>, where <N> is the largest base alignment
+     *     value of any of its members, and rounded up to the base
+     *     alignment of a vec4. The individual members of this
+     *     sub-structure are then assigned offsets by applying this set
+     *     of rules recursively, where the base offset of the first
+     *     member of the sub-structure is equal to the aligned offset
+     *     of the structure. The structure may have padding at the end;
+     *     the base offset of the member following the sub-structure is
+     *     rounded up to the next multiple of the base alignment of the
+     *     structure.
+     */
+    if (this->is_record()) {
+       unsigned base_alignment = 0;
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std430_base_alignment(field_row_major));
+       }
+       return base_alignment;
+    }
+    assert(!"not reached");
+    return -1;
+ }
+ unsigned
+ glsl_type::std430_array_stride(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+    /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+     * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_vector() && this->vector_elements == 3)
+       return 4 * N;
+    /* By default use std430_size(row_major) */
+    return this->std430_size(row_major);
+ }
+ unsigned
+ glsl_type::std430_size(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+    if (this->is_scalar() || this->is_vector())
+          return this->vector_elements * N;
+    if (this->without_array()->is_matrix()) {
+       const struct glsl_type *element_type;
+       const struct glsl_type *vec_type;
+       unsigned int array_len;
+       if (this->is_array()) {
+          element_type = this->fields.array;
+          array_len = this->length;
+       } else {
+          element_type = this;
+          array_len = 1;
+       }
+       if (row_major) {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->matrix_columns, 1);
+          array_len *= element_type->vector_elements;
+       } else {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
+       }
+       const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                   array_len);
+       return array_type->std430_size(false);
+    }
+    if (this->is_array()) {
+       if (this->fields.array->is_record())
+          return this->length * this->fields.array->std430_size(row_major);
+       else
+          return this->length * this->fields.array->std430_base_alignment(row_major);
+    }
+    if (this->is_record() || this->is_interface()) {
+       unsigned size = 0;
+       unsigned max_align = 0;
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std430_base_alignment(field_row_major);
+          size = glsl_align(size, align);
+          size += field_type->std430_size(field_row_major);
+          max_align = MAX2(align, max_align);
+       }
+       size = glsl_align(size, max_align);
+       return size;
+    }
+    assert(!"not reached");
+    return -1;
+ }
  
  unsigned
  glsl_type::count_attribute_slots() const
     case GLSL_TYPE_ARRAY:
        return this->length * this->fields.array->count_attribute_slots();
  
 +   case GLSL_TYPE_FUNCTION:
     case GLSL_TYPE_SAMPLER:
     case GLSL_TYPE_IMAGE:
     case GLSL_TYPE_ATOMIC_UINT:
diff --combined src/glsl/glsl_types.h
index 0a8a0b97dc908cb5d5701bed255a2ae5059f69f9,3ec764219de9fdb4ac7bb597a3b484aa216a2c40..b83e1ca3d2cbca3c2f65a41443171a27e700e7e7
@@@ -56,7 -56,6 +56,7 @@@ enum glsl_base_type 
     GLSL_TYPE_IMAGE,
     GLSL_TYPE_ATOMIC_UINT,
     GLSL_TYPE_STRUCT,
 +   GLSL_TYPE_FUNCTION,
     GLSL_TYPE_INTERFACE,
     GLSL_TYPE_ARRAY,
     GLSL_TYPE_VOID,
@@@ -78,7 -77,8 +78,8 @@@ enum glsl_sampler_dim 
  enum glsl_interface_packing {
     GLSL_INTERFACE_PACKING_STD140,
     GLSL_INTERFACE_PACKING_SHARED,
-    GLSL_INTERFACE_PACKING_PACKED
+    GLSL_INTERFACE_PACKING_PACKED,
+    GLSL_INTERFACE_PACKING_STD430
  };
  
  enum glsl_matrix_layout {
@@@ -180,7 -180,7 +181,7 @@@ struct glsl_type 
      */
     union {
        const struct glsl_type *array;            /**< Type of array elements. */
 -      const struct glsl_type *parameters;       /**< Parameters to function. */
 +      struct glsl_function_param *parameters;   /**< Parameters to function. */
        struct glsl_struct_field *structure;      /**< List of struct fields. */
     } fields;
  
      */
     static const glsl_type *get_subroutine_instance(const char *subroutine_name);
  
 +   /**
 +    * Get the instance of a function type
 +    */
 +   static const glsl_type *get_function_instance(const struct glsl_type *return_type,
 +                                                 const glsl_function_param *parameters,
 +                                                 unsigned num_params);
 +
     /**
      * Get the type resulting from a multiplication of \p type_a * \p type_b
      */
      */
     unsigned component_slots() const;
  
+    /**
+     * Calculate offset between the base location of the struct in
+     * uniform storage and a struct member.
+     * For the initial call, length is the index of the member to find the
+     * offset for.
+     */
+    unsigned record_location_offset(unsigned length) const;
     /**
      * Calculate the number of unique values from glGetUniformLocation for the
      * elements of the type.
      */
     unsigned std140_size(bool row_major) const;
  
+    /**
+     * Alignment in bytes of the start of this type in a std430 shader
+     * storage block.
+     */
+    unsigned std430_base_alignment(bool row_major) const;
+    /**
+     * Calculate array stride in bytes of this type in a std430 shader storage
+     * block.
+     */
+    unsigned std430_array_stride(bool row_major) const;
+    /**
+     * Size in bytes of this type in a std430 shader storage block.
+     *
+     * Note that this is not GL_BUFFER_SIZE
+     */
+    unsigned std430_size(bool row_major) const;
     /**
      * \brief Can this type be implicitly converted to another?
      *
        return t;
     }
  
+    /**
+     * Return the total number of elements in an array including the elements
+     * in arrays of arrays.
+     */
+    unsigned arrays_of_arrays_size() const
+    {
+       if (!is_array())
+          return 0;
+       unsigned size = length;
+       const glsl_type *base_type = fields.array;
+       while (base_type->is_array()) {
+          size = size * base_type->length;
+          base_type = base_type->fields.array;
+       }
+       return size;
+    }
     /**
      * Return the amount of atomic counter storage required for a type.
      */
@@@ -697,10 -736,6 +744,10 @@@ private
     glsl_type(const glsl_struct_field *fields, unsigned num_fields,
             enum glsl_interface_packing packing, const char *name);
  
 +   /** Constructor for interface types */
 +   glsl_type(const glsl_type *return_type,
 +             const glsl_function_param *params, unsigned num_params);
 +
     /** Constructor for array types */
     glsl_type(const glsl_type *array, unsigned length);
  
     /** Hash table containing the known subroutine types. */
     static struct hash_table *subroutine_types;
  
 +   /** Hash table containing the known function types. */
 +   static struct hash_table *function_types;
 +
     static bool record_key_compare(const void *a, const void *b);
     static unsigned record_key_hash(const void *key);
  
     /*@}*/
  };
  
 +#undef DECL_TYPE
 +#undef STRUCT_TYPE
 +#endif /* __cplusplus */
 +
  struct glsl_struct_field {
     const struct glsl_type *type;
     const char *name;
      */
     int stream;
  
 -
+    /**
+     * Image qualifiers, applicable to buffer variables defined in shader
+     * storage buffer objects (SSBOs)
+     */
+    unsigned image_read_only:1;
+    unsigned image_write_only:1;
+    unsigned image_coherent:1;
+    unsigned image_volatile:1;
+    unsigned image_restrict:1;
 +#ifdef __cplusplus
     glsl_struct_field(const struct glsl_type *_type, const char *_name)
        : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
          sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
     {
        /* empty */
     }
 +#endif
 +};
 +
 +struct glsl_function_param {
 +   const struct glsl_type *type;
 +
 +   bool in;
 +   bool out;
  };
  
  static inline unsigned int
@@@ -830,4 -860,8 +887,4 @@@ glsl_align(unsigned int a, unsigned in
     return (a + align - 1) / align * align;
  }
  
 -#undef DECL_TYPE
 -#undef STRUCT_TYPE
 -#endif /* __cplusplus */
 -
  #endif /* GLSL_TYPES_H */
diff --combined src/glsl/ir_clone.cpp
index 4edf70dba5d96ea0fb617a12eb8f37bb453c4765,a2cd672d5d61878a6216213119a5f73b553de393..d6b06eeec87e2d2939e0460d0a717e04eeb5be28
@@@ -222,6 -222,7 +222,7 @@@ ir_texture::clone(void *mem_ctx, struc
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+    case ir_texture_samples:
        break;
     case ir_txb:
        new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
@@@ -363,7 -364,6 +364,7 @@@ ir_constant::clone(void *mem_ctx, struc
        return c;
     }
  
 +   case GLSL_TYPE_FUNCTION:
     case GLSL_TYPE_SAMPLER:
     case GLSL_TYPE_IMAGE:
     case GLSL_TYPE_ATOMIC_UINT:
index f238513f17451b09ad97d4ae6c8fbaf474435e71,34830829b4a63de3a7fba63856b5f1973ed93aae..b0a4ec3f2fbfa93060a3a6d13828af6b47483251
@@@ -48,7 -48,7 +48,7 @@@ static unsigne
  get_uniform_block_index(const gl_shader_program *shProg,
                          const char *uniformBlockName)
  {
-    for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+    for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
        if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
         return i;
     }
@@@ -88,7 -88,6 +88,7 @@@ copy_constant_to_storage(union gl_const
        case GLSL_TYPE_IMAGE:
        case GLSL_TYPE_ATOMIC_UINT:
        case GLSL_TYPE_INTERFACE:
 +      case GLSL_TYPE_FUNCTION:
        case GLSL_TYPE_VOID:
        case GLSL_TYPE_SUBROUTINE:
        case GLSL_TYPE_ERROR:
index e3597e57e7331c5dfda9ca4d440217598bc56657,f03a107a9015ca59fb9c9223c83df31236b3f916..5ee6ff1d854ec35a6eb212ded4117f4ca52795cd
@@@ -44,7 -44,7 +44,7 @@@ namespace 
  class nir_visitor : public ir_visitor
  {
  public:
 -   nir_visitor(nir_shader *shader);
 +   nir_visitor(nir_shader *shader, gl_shader *sh);
     ~nir_visitor();
  
     virtual void visit(ir_variable *);
@@@ -84,8 -84,6 +84,8 @@@ private
  
     bool supports_ints;
  
 +   struct gl_shader *sh;
 +
     nir_shader *shader;
     nir_function_impl *impl;
     exec_list *cf_node_list;
@@@ -135,19 -133,23 +135,24 @@@ glsl_to_nir(struct gl_shader *sh, cons
  {
     nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
  
 -   nir_visitor v1(shader);
 +   nir_visitor v1(shader, sh);
     nir_function_visitor v2(&v1);
     v2.run(sh->ir);
     visit_exec_list(sh->ir, &v1);
  
+    nir_lower_outputs_to_temporaries(shader);
+    shader->gs.vertices_out = sh->Geom.VerticesOut;
+    shader->gs.invocations = sh->Geom.Invocations;
     return shader;
  }
  
 -nir_visitor::nir_visitor(nir_shader *shader)
 +nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh)
  {
     this->supports_ints = shader->options->native_integers;
     this->shader = shader;
 +   this->sh = sh;
     this->is_global = true;
     this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                               _mesa_key_pointer_equal);
@@@ -327,7 -329,6 +332,7 @@@ nir_visitor::visit(ir_variable *ir
     }
  
     var->data.index = ir->data.index;
 +   var->data.descriptor_set = 0;
     var->data.binding = ir->data.binding;
     /* XXX Get rid of buffer_index */
     var->data.atomic.buffer_index = ir->data.binding;
@@@ -646,11 -647,34 +651,34 @@@ nir_visitor::visit(ir_call *ir
           op = nir_intrinsic_memory_barrier;
        } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
           op = nir_intrinsic_image_size;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+          op = nir_intrinsic_image_samples;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+          op = nir_intrinsic_store_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+          op = nir_intrinsic_load_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_add;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_and;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_or;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_xor;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_min;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_max;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_exchange;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_comp_swap;
        } else {
           unreachable("not reached");
        }
  
        nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+       nir_dest *dest = &instr->dest;
  
        switch (op) {
        case nir_intrinsic_atomic_counter_read_var:
              (ir_dereference *) ir->actual_parameters.get_head();
           instr->variables[0] = evaluate_deref(&instr->instr, param);
           nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
        }
        case nir_intrinsic_image_load:
        case nir_intrinsic_image_atomic_xor:
        case nir_intrinsic_image_atomic_exchange:
        case nir_intrinsic_image_atomic_comp_swap:
+       case nir_intrinsic_image_samples:
        case nir_intrinsic_image_size: {
           nir_ssa_undef_instr *instr_undef =
              nir_ssa_undef_instr_create(shader, 1);
                                info->dest_components, NULL);
           }
  
-          if (op == nir_intrinsic_image_size)
+          if (op == nir_intrinsic_image_size ||
+              op == nir_intrinsic_image_samples) {
+             nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
              break;
+          }
  
           /* Set the address argument, extending the coordinate vector to four
            * components.
              instr->src[3] = evaluate_rvalue((ir_dereference *)param);
              param = param->get_next();
           }
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
        }
        case nir_intrinsic_memory_barrier:
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
+       case nir_intrinsic_store_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+          param = param->get_next();
+          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+          param = param->get_next();
+          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+          assert(write_mask);
+          /* Check if we need the indirect version */
+          ir_constant *const_offset = offset->as_constant();
+          if (!const_offset) {
+             op = nir_intrinsic_store_ssbo_indirect;
+             ralloc_free(instr);
+             instr = nir_intrinsic_instr_create(shader, op);
+             instr->src[2] = evaluate_rvalue(offset);
+             instr->const_index[0] = 0;
+             dest = &instr->dest;
+          } else {
+             instr->const_index[0] = const_offset->value.u[0];
+          }
+          instr->const_index[1] = write_mask->value.u[0];
+          instr->src[0] = evaluate_rvalue(val);
+          instr->num_components = val->type->vector_elements;
+          instr->src[1] = evaluate_rvalue(block);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_load_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+          /* Check if we need the indirect version */
+          ir_constant *const_offset = offset->as_constant();
+          if (!const_offset) {
+             op = nir_intrinsic_load_ssbo_indirect;
+             ralloc_free(instr);
+             instr = nir_intrinsic_instr_create(shader, op);
+             instr->src[1] = evaluate_rvalue(offset);
+             instr->const_index[0] = 0;
+             dest = &instr->dest;
+          } else {
+             instr->const_index[0] = const_offset->value.u[0];
+          }
+          instr->src[0] = evaluate_rvalue(block);
+          const glsl_type *type = ir->return_deref->var->type;
+          instr->num_components = type->vector_elements;
+          /* Setup destination register */
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            type->vector_elements, NULL);
+          /* Insert the created nir instruction now since in the case of boolean
+           * result we will need to emit another instruction after it
+           */
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+          /*
+           * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+           * consider a true boolean to be ~0. Fix this up with a != 0
+           * comparison.
+           */
+          if (type->base_type == GLSL_TYPE_BOOL) {
+             nir_load_const_instr *const_zero =
+                nir_load_const_instr_create(shader, 1);
+             const_zero->value.u[0] = 0;
+             nir_instr_insert_after_cf_list(this->cf_node_list,
+                                            &const_zero->instr);
+             nir_alu_instr *load_ssbo_compare =
+                nir_alu_instr_create(shader, nir_op_ine);
+             load_ssbo_compare->src[0].src.is_ssa = true;
+             load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+             load_ssbo_compare->src[1].src.is_ssa = true;
+             load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+             for (unsigned i = 0; i < type->vector_elements; i++)
+                load_ssbo_compare->src[1].swizzle[i] = 0;
+             nir_ssa_dest_init(&load_ssbo_compare->instr,
+                               &load_ssbo_compare->dest.dest,
+                               type->vector_elements, NULL);
+             load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+             nir_instr_insert_after_cf_list(this->cf_node_list,
+                                            &load_ssbo_compare->instr);
+             dest = &load_ssbo_compare->dest.dest;
+          }
+          break;
+       }
+       case nir_intrinsic_ssbo_atomic_add:
+       case nir_intrinsic_ssbo_atomic_min:
+       case nir_intrinsic_ssbo_atomic_max:
+       case nir_intrinsic_ssbo_atomic_and:
+       case nir_intrinsic_ssbo_atomic_or:
+       case nir_intrinsic_ssbo_atomic_xor:
+       case nir_intrinsic_ssbo_atomic_exchange:
+       case nir_intrinsic_ssbo_atomic_comp_swap: {
+          int param_count = ir->actual_parameters.length();
+          assert(param_count == 3 || param_count == 4);
+          /* Block index */
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_instruction *inst = (ir_instruction *) param;
+          instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+          /* Offset */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+          /* data1 parameter (this is always present) */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+          /* data2 parameter (only with atomic_comp_swap) */
+          if (param_count == 4) {
+             assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+             param = param->get_next();
+             inst = (ir_instruction *) param;
+             instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+          }
+          /* Atomic result */
+          assert(ir->return_deref);
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            ir->return_deref->type->vector_elements, NULL);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+          break;
+       }
        default:
           unreachable("not reached");
        }
  
-       nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
        if (ir->return_deref) {
           nir_intrinsic_instr *store_instr =
              nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
  
           store_instr->variables[0] =
              evaluate_deref(&store_instr->instr, ir->return_deref);
-          store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+          store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
  
           nir_instr_insert_after_cf_list(this->cf_node_list,
                                          &store_instr->instr);
@@@ -922,7 -1092,8 +1096,8 @@@ nir_visitor::add_instr(nir_instr *instr
  {
     nir_dest *dest = get_instr_dest(instr);
  
-    nir_ssa_dest_init(instr, dest, num_components, NULL);
+    if (dest)
+       nir_ssa_dest_init(instr, dest, num_components, NULL);
  
     nir_instr_insert_after_cf_list(this->cf_node_list, instr);
     this->result = instr;
@@@ -1002,11 -1173,9 +1177,10 @@@ nir_visitor::visit(ir_expression *ir
        } else {
           op = nir_intrinsic_load_ubo_indirect;
        }
 +
        nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
        load->num_components = ir->type->vector_elements;
        load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
-       load->const_index[1] = 1; /* number of vec4's */
        load->src[0] = evaluate_rvalue(ir->operands[0]);
        if (!const_index)
           load->src[1] = evaluate_rvalue(ir->operands[1]);
           unreachable("not reached");
        }
        break;
+    case ir_unop_get_buffer_size: {
+       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+          this->shader,
+          nir_intrinsic_get_buffer_size);
+       load->num_components = ir->type->vector_elements;
+       load->src[0] = evaluate_rvalue(ir->operands[0]);
+       add_instr(&load->instr, ir->type->vector_elements);
+       return;
+    }
     case ir_binop_add:
     case ir_binop_sub:
     case ir_binop_mul:
@@@ -1722,6 -1901,11 +1906,11 @@@ nir_visitor::visit(ir_texture *ir
        num_srcs = 0;
        break;
  
+    case ir_texture_samples:
+       op = nir_texop_texture_samples;
+       num_srcs = 0;
+       break;
     default:
        unreachable("not reached");
     }
diff --combined src/glsl/nir/nir.h
index 7b188fa8ed19354284792aca1cea1bb9ad16e059,d0c7b04a49f655d464a24d9fceb65e02d5a87cb4..19a4a361a25d287e9e1f69a5fe834d35aa4ce616
@@@ -278,6 -278,7 +278,7 @@@ typedef struct 
         *   - Fragment shader output: one of the values from \c gl_frag_result.
         *   - Uniforms: Per-stage uniform slot number for default uniform block.
         *   - Uniforms: Index within the uniform block definition for UBO members.
+        *   - Non-UBO Uniforms: uniform slot number.
         *   - Other: This field is not currently used.
         *
         * If the variable is a uniform, shader input, or shader output, and the
         */
        int index;
  
 +      /**
 +       * Descriptor set binding for sampler or UBO.
 +       */
 +      int descriptor_set;
 +
        /**
         * Initial binding point for a sampler or UBO.
         *
@@@ -422,6 -418,9 +423,9 @@@ typedef struct nir_instr 
     nir_instr_type type;
     struct nir_block *block;
  
+    /** generic instruction index. */
+    unsigned index;
     /* A temporary for optimization and analysis passes to use for storing
      * flags.  For instance, DCE uses this to store the "dead/live" info.
      */
@@@ -520,11 -519,7 +524,11 @@@ typedef struct nir_src 
     bool is_ssa;
  } nir_src;
  
 -#define NIR_SRC_INIT (nir_src) { { NULL } }
 +#ifdef __cplusplus
 +#  define NIR_SRC_INIT nir_src()
 +#else
 +#  define NIR_SRC_INIT (nir_src) { { NULL } }
 +#endif
  
  #define nir_foreach_use(reg_or_ssa_def, src) \
     list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
@@@ -547,11 -542,7 +551,11 @@@ typedef struct 
     bool is_ssa;
  } nir_dest;
  
 -#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
 +#ifdef __cplusplus
 +#  define NIR_DEST_INIT nir_dest()
 +#else
 +#  define NIR_DEST_INIT (nir_dest) { { { NULL } } }
 +#endif
  
  #define nir_foreach_def(reg, dest) \
     list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
@@@ -593,8 -584,8 +597,8 @@@ nir_dest_for_reg(nir_register *reg
     return dest;
  }
  
- void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
- void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
  
  typedef struct {
     nir_src src;
@@@ -643,10 -634,6 +647,6 @@@ typedef struct 
     unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
  } nir_alu_dest;
  
- void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
- void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
-                        void *mem_ctx);
  typedef enum {
     nir_type_invalid = 0, /* Not a valid type */
     nir_type_float,
@@@ -715,6 -702,11 +715,11 @@@ typedef struct nir_alu_instr 
     nir_alu_src src[];
  } nir_alu_instr;
  
+ void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                       nir_alu_instr *instr);
+ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                        nir_alu_instr *instr);
  /* is this source channel used? */
  static inline bool
  nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
@@@ -790,15 -782,6 +795,15 @@@ NIR_DEFINE_CAST(nir_deref_as_var, nir_d
  NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
  NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
  
 +/** Returns the tail of a deref chain */
 +static inline nir_deref *
 +nir_deref_tail(nir_deref *deref)
 +{
 +   while (deref->child)
 +      deref = deref->child;
 +   return deref;
 +}
 +
  typedef struct {
     nir_instr instr;
  
@@@ -966,7 -949,8 +971,8 @@@ typedef enum 
     nir_texop_txs,                /**< Texture size */
     nir_texop_lod,                /**< Texture lod query */
     nir_texop_tg4,                /**< Texture gather */
-    nir_texop_query_levels       /**< Texture levels query */
+    nir_texop_query_levels,       /**< Texture levels query */
+    nir_texop_texture_samples,    /**< Texture samples query */
  } nir_texop;
  
  typedef struct {
     /* gather component selector */
     unsigned component : 2;
  
 +   /* The descriptor set containing this texture */
 +   unsigned sampler_set;
 +
     /** The sampler index
      *
      * If this texture instruction has a nir_tex_src_sampler_offset source,
@@@ -1041,6 -1022,7 +1047,7 @@@ nir_tex_instr_dest_size(nir_tex_instr *
     case nir_texop_lod:
        return 2;
  
+    case nir_texop_texture_samples:
     case nir_texop_query_levels:
        return 1;
  
@@@ -1444,7 -1426,6 +1451,7 @@@ typedef struct nir_function 
  
  typedef struct nir_shader_compiler_options {
     bool lower_ffma;
 +   bool lower_fdiv;
     bool lower_flrp;
     bool lower_fpow;
     bool lower_fsat;
     /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
     bool lower_scmp;
  
+    /* Does the native fdot instruction replicate its result for four
+     * components?  If so, then opt_algebraic_late will turn all fdotN
+     * instructions into fdot_replicatedN instructions.
+     */
+    bool fdot_replicates;
+    /** lowers ffract to fsub+ffloor: */
+    bool lower_ffract;
     /**
      * Does the driver support real 32-bit integers?  (Otherwise, integers
      * are simulated by floats.)
  } nir_shader_compiler_options;
  
  typedef struct nir_shader {
-    /** hash table of name -> uniform nir_variable */
+    /** list of uniforms (nir_variable) */
     struct exec_list uniforms;
  
-    /** hash table of name -> input nir_variable */
+    /** list of inputs (nir_variable) */
     struct exec_list inputs;
  
-    /** hash table of name -> output nir_variable */
+    /** list of outputs (nir_variable) */
     struct exec_list outputs;
  
     /** Set of driver-specific options for the shader.
      */
     const struct nir_shader_compiler_options *options;
  
-    /** list of global variables in the shader */
+    /** list of global variables in the shader (nir_variable) */
     struct exec_list globals;
  
-    /** list of system value variables in the shader */
+    /** list of system value variables in the shader (nir_variable) */
     struct exec_list system_values;
  
     struct exec_list functions; /** < list of nir_function */
  
     /** The shader stage, such as MESA_SHADER_VERTEX. */
     gl_shader_stage stage;
+    struct {
+       /** The maximum number of vertices the geometry shader might write. */
+       unsigned vertices_out;
+       /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+       unsigned invocations;
+    } gs;
  } nir_shader;
  
  #define nir_foreach_overload(shader, overload)                        \
@@@ -1597,17 -1595,6 +1621,17 @@@ typedef struct 
     };
  } nir_cursor;
  
 +static inline nir_block *
 +nir_cursor_current_block(nir_cursor cursor)
 +{
 +   if (cursor.option == nir_cursor_before_instr ||
 +       cursor.option == nir_cursor_after_instr) {
 +      return cursor.instr->block;
 +   } else {
 +      return cursor.block;
 +   }
 +}
 +
  static inline nir_cursor
  nir_before_block(nir_block *block)
  {
@@@ -1761,12 -1748,14 +1785,14 @@@ bool nir_srcs_equal(nir_src src1, nir_s
  void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
  void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
  void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+                             nir_dest new_dest);
  
  void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
                         unsigned num_components, const char *name);
  void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                        unsigned num_components, const char *name);
- void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
  
  /* visits basic blocks in source-code order */
  typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@@ -1774,15 -1763,20 +1800,20 @@@ bool nir_foreach_block(nir_function_imp
                         void *state);
  bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
                                 void *state);
+ bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                                   void *state);
  
  /* If the following CF node is an if, this function returns that if.
   * Otherwise, it returns NULL.
   */
  nir_if *nir_block_get_following_if(nir_block *block);
  
+ nir_loop *nir_block_get_following_loop(nir_block *block);
  void nir_index_local_regs(nir_function_impl *impl);
  void nir_index_global_regs(nir_shader *shader);
  void nir_index_ssa_defs(nir_function_impl *impl);
+ unsigned nir_index_instrs(nir_function_impl *impl);
  
  void nir_index_blocks(nir_function_impl *impl);
  
@@@ -1810,17 -1804,19 +1841,21 @@@ void nir_dump_dom_frontier(nir_shader *
  void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
  void nir_dump_cfg(nir_shader *shader, FILE *fp);
  
- void nir_split_var_copies(nir_shader *shader);
+ int nir_gs_count_vertices(nir_shader *shader);
+ bool nir_split_var_copies(nir_shader *shader);
  
  void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
  void nir_lower_var_copies(nir_shader *shader);
  
void nir_lower_global_vars_to_local(nir_shader *shader);
bool nir_lower_global_vars_to_local(nir_shader *shader);
  
- void nir_lower_locals_to_regs(nir_shader *shader);
+ bool nir_lower_locals_to_regs(nir_shader *shader);
+ void nir_lower_outputs_to_temporaries(nir_shader *shader);
  
 +void nir_lower_outputs_to_temporaries(nir_shader *shader);
 +
  void nir_assign_var_locations(struct exec_list *var_list,
                                unsigned *size,
                                int (*type_size)(const struct glsl_type *));
@@@ -1829,9 -1825,10 +1864,10 @@@ void nir_lower_io(nir_shader *shader
                    int (*type_size)(const struct glsl_type *));
  void nir_lower_vars_to_ssa(nir_shader *shader);
  
void nir_remove_dead_variables(nir_shader *shader);
bool nir_remove_dead_variables(nir_shader *shader);
  
- void nir_lower_vec_to_movs(nir_shader *shader);
+ void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+ bool nir_lower_vec_to_movs(nir_shader *shader);
  void nir_lower_alu_to_scalar(nir_shader *shader);
  void nir_lower_load_const_to_scalar(nir_shader *shader);
  
@@@ -1839,16 -1836,58 +1875,59 @@@ void nir_lower_phis_to_scalar(nir_shade
  
  void nir_lower_samplers(nir_shader *shader,
                          const struct gl_shader_program *shader_program);
 +void nir_lower_samplers_for_vk(nir_shader *shader);
  
- void nir_lower_system_values(nir_shader *shader);
- void nir_lower_tex_projector(nir_shader *shader);
+ bool nir_lower_system_values(nir_shader *shader);
+ typedef struct nir_lower_tex_options {
+    /**
+     * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+     * sampler types a texture projector is lowered.
+     */
+    unsigned lower_txp;
+    /**
+     * If true, lower rect textures to 2D, using txs to fetch the
+     * texture dimensions and dividing the texture coords by the
+     * texture dims to normalize.
+     */
+    bool lower_rect;
+    /**
+     * To emulate certain texture wrap modes, this can be used
+     * to saturate the specified tex coord to [0.0, 1.0].  The
+     * bits are according to sampler #, ie. if, for example:
+     *
+     *   (conf->saturate_s & (1 << n))
+     *
+     * is true, then the s coord for sampler n is saturated.
+     *
+     * Note that clamping must happen *after* projector lowering
+     * so any projected texture sample instruction with a clamped
+     * coordinate gets automatically lowered, regardless of the
+     * 'lower_txp' setting.
+     */
+    unsigned saturate_s;
+    unsigned saturate_t;
+    unsigned saturate_r;
+ } nir_lower_tex_options;
+ void nir_lower_tex(nir_shader *shader,
+                    const nir_lower_tex_options *options);
  void nir_lower_idiv(nir_shader *shader);
  
+ void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_two_sided_color(nir_shader *shader);
  void nir_lower_atomics(nir_shader *shader);
  void nir_lower_to_source_mods(nir_shader *shader);
  
- void nir_normalize_cubemap_coords(nir_shader *shader);
+ bool nir_lower_gs_intrinsics(nir_shader *shader);
+ bool nir_normalize_cubemap_coords(nir_shader *shader);
  
  void nir_live_variables_impl(nir_function_impl *impl);
  bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@@ -1876,6 -1915,8 +1955,8 @@@ bool nir_opt_cse(nir_shader *shader)
  bool nir_opt_dce_impl(nir_function_impl *impl);
  bool nir_opt_dce(nir_shader *shader);
  
+ bool nir_opt_dead_cf(nir_shader *shader);
  void nir_opt_gcm(nir_shader *shader);
  
  bool nir_opt_peephole_select(nir_shader *shader);
@@@ -1887,6 -1928,9 +1968,9 @@@ bool nir_opt_undef(nir_shader *shader)
  
  void nir_sweep(nir_shader *shader);
  
+ nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+ gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
  #ifdef __cplusplus
  } /* extern "C" */
  #endif
index 295a209b4e653972009a3d457a66a739f22153ae,624329d0a8a6fe244e2af18eb190853f48dd351a..bf45d0373c08a441d43207b76d7359ae60caf01e
@@@ -76,21 -76,36 +76,36 @@@ nir_build_imm(nir_builder *build, unsig
  static inline nir_ssa_def *
  nir_imm_float(nir_builder *build, float x)
  {
-    nir_const_value v = { { .f = {x, 0, 0, 0} } };
+    nir_const_value v;
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
     return nir_build_imm(build, 1, v);
  }
  
  static inline nir_ssa_def *
  nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
  {
-    nir_const_value v = { { .f = {x, y, z, w} } };
+    nir_const_value v;
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
+    v.f[1] = y;
+    v.f[2] = z;
+    v.f[3] = w;
     return nir_build_imm(build, 4, v);
  }
  
  static inline nir_ssa_def *
  nir_imm_int(nir_builder *build, int x)
  {
-    nir_const_value v = { { .i = {x, 0, 0, 0} } };
+    nir_const_value v;
+    memset(&v, 0, sizeof(v));
+    v.i[0] = x;
     return nir_build_imm(build, 1, v);
  }
  
@@@ -173,6 -188,24 +188,24 @@@ nir_##op(nir_builder *build, nir_ssa_de
  
  #include "nir_builder_opcodes.h"
  
+ static inline nir_ssa_def *
+ nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+ {
+    switch (num_components) {
+    case 4:
+       return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+    case 3:
+       return nir_vec3(build, comp[0], comp[1], comp[2]);
+    case 2:
+       return nir_vec2(build, comp[0], comp[1]);
+    case 1:
+       return comp[0];
+    default:
+       unreachable("bad component count");
+       return NULL;
+    }
+ }
  /**
   * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
   */
@@@ -216,23 -249,13 +249,30 @@@ nir_swizzle(nir_builder *build, nir_ssa
                       nir_imov_alu(build, alu_src, num_components);
  }
  
 +/* Selects the right fdot given the number of components in each source. */
 +static inline nir_ssa_def *
 +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
 +{
 +   assert(src0->num_components == src1->num_components);
 +   switch (src0->num_components) {
 +   case 1: return nir_fmul(build, src0, src1);
 +   case 2: return nir_fdot2(build, src0, src1);
 +   case 3: return nir_fdot3(build, src0, src1);
 +   case 4: return nir_fdot4(build, src0, src1);
 +   default:
 +      unreachable("bad component size");
 +   }
 +
 +   return NULL;
 +}
 +
+ static inline nir_ssa_def *
+ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+ {
+    unsigned swizzle[4] = {c, c, c, c};
+    return nir_swizzle(b, def, swizzle, 1, false);
+ }
  /**
   * Turns a nir_src into a nir_ssa_def * so it can be passed to
   * nir_build_alu()-based builder calls.
@@@ -251,4 -274,31 +291,31 @@@ nir_ssa_for_src(nir_builder *build, nir
     return nir_imov_alu(build, alu, num_components);
  }
  
+ static inline nir_ssa_def *
+ nir_load_var(nir_builder *build, nir_variable *var)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+    nir_intrinsic_instr *load =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+    load->num_components = num_components;
+    load->variables[0] = nir_deref_var_create(load, var);
+    nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+    nir_builder_instr_insert(build, &load->instr);
+    return &load->dest.ssa;
+ }
+ static inline void
+ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+    nir_intrinsic_instr *store =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+    store->num_components = num_components;
+    store->variables[0] = nir_deref_var_create(store, var);
+    store->src[0] = nir_src_for_ssa(value);
+    nir_builder_instr_insert(build, &store->instr);
+ }
  #endif /* NIR_BUILDER_H */
index 1f24f9f677dbed015373256b495e95fa9b97767d,649312fec51e6652065c3540a4d1ac7e0f7ae449..e02779e157bbbe6002857fd645e00102392850b7
@@@ -61,6 -61,13 +61,13 @@@ INTRINSIC(interp_var_at_sample, 1, ARR(
  INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
+ /*
+  * Ask the driver for the size of a given buffer. It takes the buffer index
+  * as source.
+  */
+ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  /*
   * a barrier is an intrinsic with no inputs/outputs but which can't be moved
   * around/optimized in general
@@@ -79,9 -86,30 +86,30 @@@ BARRIER(memory_barrier
  /** A conditional discard, with a single boolean source. */
  INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
  
+ /**
+  * Basic Geometry Shader intrinsics.
+  *
+  * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
+  * index, which is the stream ID to write to.
+  *
+  * end_primitive implements GLSL's EndPrimitive() built-in.
+  */
  INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
  INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
  
+ /**
+  * Geometry Shader intrinsics with a vertex count.
+  *
+  * Alternatively, drivers may implement these intrinsics, and use
+  * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+  *
+  * These maintain a count of the number of vertices emitted, as an additional
+  * unsigned integer source.
+  */
+ INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
  /*
   * Atomic counters
   *
@@@ -125,29 -153,60 +153,61 @@@ INTRINSIC(image_atomic_exchange, 3, ARR
  INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
  INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
- #define SYSTEM_VALUE(name, components) \
-    INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+ /*
+  * SSBO atomic intrinsics
+  *
+  * All of the SSBO atomic memory operations read a value from memory,
+  * compute a new value using one of the operations below, write the new
+  * value to memory, and return the original value read.
+  *
+  * All operations take 3 sources except CompSwap that takes 4. These
+  * sources represent:
+  *
+  * 0: The SSBO buffer index.
+  * 1: The offset into the SSBO buffer of the variable that the atomic
+  *    operation will operate on.
+  * 2: The data parameter to the atomic function (i.e. the value to add
+  *    in ssbo_atomic_add, etc).
+  * 3: For CompSwap only: the second data parameter.
+  */
+ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+ #define SYSTEM_VALUE(name, components, num_indices) \
+    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
     NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
- SYSTEM_VALUE(front_face, 1)
- SYSTEM_VALUE(vertex_id, 1)
- SYSTEM_VALUE(vertex_id_zero_base, 1)
- SYSTEM_VALUE(base_vertex, 1)
- SYSTEM_VALUE(instance_id, 1)
- SYSTEM_VALUE(sample_id, 1)
- SYSTEM_VALUE(sample_pos, 2)
- SYSTEM_VALUE(sample_mask_in, 1)
- SYSTEM_VALUE(invocation_id, 1)
+ SYSTEM_VALUE(front_face, 1, 0)
+ SYSTEM_VALUE(vertex_id, 1, 0)
+ SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+ SYSTEM_VALUE(base_vertex, 1, 0)
+ SYSTEM_VALUE(instance_id, 1, 0)
+ SYSTEM_VALUE(sample_id, 1, 0)
+ SYSTEM_VALUE(sample_pos, 2, 0)
+ SYSTEM_VALUE(sample_mask_in, 1, 0)
+ SYSTEM_VALUE(invocation_id, 1, 0)
+ SYSTEM_VALUE(local_invocation_id, 3, 0)
+ SYSTEM_VALUE(work_group_id, 3, 0)
+ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+ SYSTEM_VALUE(num_work_groups, 3, 0)
  
  /*
   * The format of the indices depends on the type of the load.  For uniforms,
   * the first index is the base address and the second index is an offset that
   * should be added to the base address.  (This way you can determine in the
   * back-end which variable is being accessed even in an array.)  For inputs,
 - * the one and only index corresponds to the attribute slot.  UBO loads also
 - * have a single index which is the base address to load from.
 + * the one and only index corresponds to the attribute slot.  UBO loads
 + * have two indices the first of which is the descriptor set and the second
 + * is the base address to load from.
   *
   * UBO loads have a (possibly constant) source which is the UBO buffer index.
   * For each type of load, the _indirect variant has one additional source
               true, 0, 0, indices, flags)
  
  LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- /* LOAD(ssbo, 1, 0) */
+ LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
  
  /*
   * Stores work the same way as loads, except now the first register input is
   * the value or array to store and the optional second input is the indirect
-  * offset.
+  * offset. SSBO stores are similar, but they accept an extra source for the
+  * block index and an extra index with the writemask to use.
   */
  
- #define STORE(name, num_indices, flags) \
-    INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
-    INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
-              num_indices, flags) \
+ #define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+    INTRINSIC(store_##name, 1 + extra_srcs, \
+              ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+              false, 0, 0, 1 + extra_indices, flags) \
+    INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+              ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+              false, 0, 0, 1 + extra_indices, flags)
  
- STORE(output, 1, 0)
- /* STORE(ssbo, 2, 0) */
+ STORE(output, 0, 0, 0, 0)
+ STORE(ssbo, 1, 1, 1, 0)
  
- LAST_INTRINSIC(store_output_indirect)
+ LAST_INTRINSIC(store_ssbo_indirect)
index 0000000000000000000000000000000000000000,58ea0db4e0f12c85ffdd62d435c1973f71d4a47d..33cd9c8b0cfc5b62ac66ce7fe21fc8a60da9bde1
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,188 +1,258 @@@
+ /*
+  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ #include "nir.h"
+ #include "nir_builder.h"
+ #include "../program.h"
+ #include "program/hash_table.h"
+ #include "ir_uniform.h"
+ #include "main/compiler.h"
+ #include "main/mtypes.h"
+ #include "program/prog_parameter.h"
+ #include "program/program.h"
++static void
++add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect)
++{
++   /* First, we have to resize the array of texture sources */
++   nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
++                                         instr->num_srcs + 1);
++
++   for (unsigned i = 0; i < instr->num_srcs; i++) {
++      new_srcs[i].src_type = instr->src[i].src_type;
++      nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src);
++   }
++
++   ralloc_free(instr->src);
++   instr->src = new_srcs;
++
++   /* Now we can go ahead and move the source over to being a
++    * first-class texture source.
++    */
++   instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
++   instr->num_srcs++;
++   nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src,
++                         indirect);
++}
++
+ /* Calculate the sampler index based on array indicies and also
+  * calculate the base uniform location for struct members.
+  */
+ static void
+ calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                      unsigned *array_elements, nir_ssa_def **indirect,
+                      nir_builder *b, unsigned *location)
+ {
+    if (tail->child == NULL)
+       return;
+    switch (tail->child->deref_type) {
+    case nir_deref_type_array: {
+       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+       assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       instr->sampler_index += deref_array->base_offset * *array_elements;
+       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+          nir_ssa_def *mul =
+             nir_imul(b, nir_imm_int(b, *array_elements),
+                      nir_ssa_for_src(b, deref_array->indirect, 1));
+          nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                                NIR_SRC_INIT);
+          if (*indirect) {
+             *indirect = nir_iadd(b, *indirect, mul);
+          } else {
+             *indirect = mul;
+          }
+       }
+       *array_elements *= glsl_get_length(tail->type);
+        break;
+    }
+    case nir_deref_type_struct: {
+       nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+       *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       break;
+    }
+    default:
+       unreachable("Invalid deref type");
+       break;
+    }
+ }
+ static void
+ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+               gl_shader_stage stage, nir_builder *builder)
+ {
+    if (instr->sampler == NULL)
+       return;
+    instr->sampler_index = 0;
+    unsigned location = instr->sampler->var->data.location;
+    unsigned array_elements = 1;
+    nir_ssa_def *indirect = NULL;
+    builder->cursor = nir_before_instr(&instr->instr);
+    calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                         &indirect, builder, &location);
+    if (indirect) {
+       /* First, we have to resize the array of texture sources */
+       nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                             instr->num_srcs + 1);
+       for (unsigned i = 0; i < instr->num_srcs; i++) {
+          new_srcs[i].src_type = instr->src[i].src_type;
+          nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                             &instr->src[i].src);
+       }
+       ralloc_free(instr->src);
+       instr->src = new_srcs;
+       /* Now we can go ahead and move the source over to being a
+        * first-class texture source.
+        */
+       instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+       instr->num_srcs++;
+       nir_instr_rewrite_src(&instr->instr,
+                             &instr->src[instr->num_srcs - 1].src,
+                             nir_src_for_ssa(indirect));
+       instr->sampler_array_size = array_elements;
+    }
+    if (location > shader_program->NumUniformStorage - 1 ||
+        !shader_program->UniformStorage[location].sampler[stage].active) {
+       assert(!"cannot return a sampler");
+       return;
+    }
+    instr->sampler_index +=
+       shader_program->UniformStorage[location].sampler[stage].index;
+    instr->sampler = NULL;
+ }
+ typedef struct {
+    nir_builder builder;
+    const struct gl_shader_program *shader_program;
+    gl_shader_stage stage;
+ } lower_state;
+ static bool
+ lower_block_cb(nir_block *block, void *_state)
+ {
+    lower_state *state = (lower_state *) _state;
+    nir_foreach_instr(block, instr) {
+       if (instr->type == nir_instr_type_tex) {
+          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+          lower_sampler(tex_instr, state->shader_program, state->stage,
+                        &state->builder);
+       }
+    }
+    return true;
+ }
+ static void
+ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+            gl_shader_stage stage)
+ {
+    lower_state state;
+    nir_builder_init(&state.builder, impl);
+    state.shader_program = shader_program;
+    state.stage = stage;
+    nir_foreach_block(impl, lower_block_cb, &state);
+ }
+ void
+ nir_lower_samplers(nir_shader *shader,
+                    const struct gl_shader_program *shader_program)
+ {
+    nir_foreach_overload(shader, overload) {
+       if (overload->impl)
+          lower_impl(overload->impl, shader_program, shader->stage);
+    }
+ }
++
++static bool
++lower_samplers_for_vk_block(nir_block *block, void *data)
++{
++   nir_foreach_instr(block, instr) {
++      if (instr->type != nir_instr_type_tex)
++         continue;
++
++      nir_tex_instr *tex = nir_instr_as_tex(instr);
++
++      assert(tex->sampler);
++
++      tex->sampler_set = tex->sampler->var->data.descriptor_set;
++      tex->sampler_index = tex->sampler->var->data.binding;
++
++      if (tex->sampler->deref.child) {
++         assert(tex->sampler->deref.child->deref_type == nir_deref_type_array);
++         nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child);
++
++         /* Only one-level arrays are allowed in vulkan */
++         assert(arr->deref.child == NULL);
++
++         tex->sampler_index += arr->base_offset;
++         if (arr->deref_array_type == nir_deref_array_type_indirect) {
++            add_indirect_to_tex(tex, arr->indirect);
++            nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT);
++
++            tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type);
++         }
++      }
++
++      tex->sampler = NULL;
++   }
++
++   return true;
++}
++
++void
++nir_lower_samplers_for_vk(nir_shader *shader)
++{
++   nir_foreach_overload(shader, overload) {
++      if (overload->impl) {
++         nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL);
++      }
++   }
++}
index 880408bc3678c5b0bb4a704b7dd6ca57f16e0e2f,585e5e0ae98cd585861000154a91c64c80a40716..cafbd6d66a531edb47eaf88128cfbc4f8f9b5820
@@@ -66,7 -66,6 +66,7 @@@ optimizations = 
     (('imul', a, 1), a),
     (('fmul', a, -1.0), ('fneg', a)),
     (('imul', a, -1), ('ineg', a)),
 +   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
     (('ffma', 0.0, a, b), b),
     (('ffma', a, 0.0, b), b),
     (('ffma', a, b, 0.0), ('fmul', a, b)),
@@@ -77,6 -76,7 +77,7 @@@
     (('flrp', a, a, b), a),
     (('flrp', 0.0, a, b), ('fmul', a, b)),
     (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
     (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
     (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
     (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
@@@ -241,6 -241,10 +242,10 @@@ late_optimizations = 
     (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
     (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
     (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+    (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+    (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
  ]
  
  print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
index 5c163b59819a182b71f49c58a55ffa4adc17e9f7,d2ea58a8b7c6d9f781d41739d769134c5a72b2e5..f583178caf426dfeb305faf3fbcecedf35085cb8
  struct split_var_copies_state {
     void *mem_ctx;
     void *dead_ctx;
+    bool progress;
  };
  
 -static nir_deref *
 -get_deref_tail(nir_deref *deref)
 -{
 -   while (deref->child != NULL)
 -      deref = deref->child;
 -   return deref;
 -}
 -
  /* Recursively constructs deref chains to split a copy instruction into
   * multiple (if needed) copy instructions with full-length deref chains.
   * External callers of this function should pass the tail and head of the
@@@ -190,6 -199,7 +191,7 @@@ split_var_copy_instr(nir_intrinsic_inst
            * remove the old one later.
            */
           nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+          state->progress = true;
        }
        break;
  
@@@ -217,8 -227,8 +219,8 @@@ split_var_copies_block(nir_block *block
  
        nir_deref *dest_head = &intrinsic->variables[0]->deref;
        nir_deref *src_head = &intrinsic->variables[1]->deref;
 -      nir_deref *dest_tail = get_deref_tail(dest_head);
 -      nir_deref *src_tail = get_deref_tail(src_head);
 +      nir_deref *dest_tail = nir_deref_tail(dest_head);
 +      nir_deref *src_tail = nir_deref_tail(src_head);
  
        switch (glsl_get_base_type(src_tail->type)) {
        case GLSL_TYPE_ARRAY:
     return true;
  }
  
- static void
+ static bool
  split_var_copies_impl(nir_function_impl *impl)
  {
     struct split_var_copies_state state;
  
     state.mem_ctx = ralloc_parent(impl);
     state.dead_ctx = ralloc_context(NULL);
+    state.progress = false;
  
     nir_foreach_block(impl, split_var_copies_block, &state);
  
     ralloc_free(state.dead_ctx);
+    return state.progress;
  }
  
- void
+ bool
  nir_split_var_copies(nir_shader *shader)
  {
+    bool progress = false;
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-          split_var_copies_impl(overload->impl);
+          progress = split_var_copies_impl(overload->impl) || progress;
     }
+    return progress;
  }
index 69cfac18587d461b866f63403305a21c81f5b8e3,da9807f0e629b167af6ebae07c35b3a291b879fe..01f0e9b5abcc6a17bdd7fd47423a8349609f1175
@@@ -70,18 -70,6 +70,18 @@@ glsl_get_struct_field(const glsl_type *
     return type->fields.structure[index].type;
  }
  
 +const glsl_type *
 +glsl_get_function_return_type(const glsl_type *type)
 +{
 +   return type->fields.parameters[0].type;
 +}
 +
 +const glsl_function_param *
 +glsl_get_function_param(const glsl_type *type, unsigned index)
 +{
 +   return &type->fields.parameters[index + 1];
 +}
 +
  const struct glsl_type *
  glsl_get_column_type(const struct glsl_type *type)
  {
@@@ -124,20 -112,13 +124,27 @@@ glsl_get_struct_elem_name(const struct 
     return type->fields.structure[index].name;
  }
  
 +glsl_sampler_dim
 +glsl_get_sampler_dim(const struct glsl_type *type)
 +{
 +   assert(glsl_type_is_sampler(type));
 +   return (glsl_sampler_dim)type->sampler_dimensionality;
 +}
 +
 +glsl_base_type
 +glsl_get_sampler_result_type(const struct glsl_type *type)
 +{
 +   assert(glsl_type_is_sampler(type));
 +   return (glsl_base_type)type->sampler_type;
 +}
 +
+ unsigned
+ glsl_get_record_location_offset(const struct glsl_type *type,
+                                 unsigned length)
+ {
+    return type->record_location_offset(length);
+ }
  bool
  glsl_type_is_void(const glsl_type *type)
  {
@@@ -156,50 -137,12 +163,50 @@@ glsl_type_is_scalar(const struct glsl_t
     return type->is_scalar();
  }
  
 +bool
 +glsl_type_is_vector_or_scalar(const struct glsl_type *type)
 +{
 +   return type->is_vector() || type->is_scalar();
 +}
 +
  bool
  glsl_type_is_matrix(const struct glsl_type *type)
  {
     return type->is_matrix();
  }
  
 +bool
 +glsl_type_is_array(const struct glsl_type *type)
 +{
 +   return type->is_array();
 +}
 +
 +bool
 +glsl_type_is_struct(const struct glsl_type *type)
 +{
 +   return type->is_record() || type->is_interface();
 +}
 +
 +bool
 +glsl_type_is_sampler(const struct glsl_type *type)
 +{
 +   return type->is_sampler();
 +}
 +
 +bool
 +glsl_sampler_type_is_shadow(const struct glsl_type *type)
 +{
 +   assert(glsl_type_is_sampler(type));
 +   return type->sampler_shadow;
 +}
 +
 +bool
 +glsl_sampler_type_is_array(const struct glsl_type *type)
 +{
 +   assert(glsl_type_is_sampler(type));
 +   return type->sampler_array;
 +}
 +
  const glsl_type *
  glsl_void_type(void)
  {
@@@ -213,9 -156,9 +220,9 @@@ glsl_float_type(void
  }
  
  const glsl_type *
 -glsl_vec4_type(void)
 +glsl_int_type(void)
  {
 -   return glsl_type::vec4_type;
 +   return glsl_type::int_type;
  }
  
  const glsl_type *
@@@ -224,68 -167,8 +231,68 @@@ glsl_uint_type(void
     return glsl_type::uint_type;
  }
  
 +const glsl_type *
 +glsl_bool_type(void)
 +{
 +   return glsl_type::bool_type;
 +}
 +
 +const glsl_type *
 +glsl_vec4_type(void)
 +{
 +   return glsl_type::vec4_type;
 +}
 +
 +const glsl_type *
 +glsl_scalar_type(enum glsl_base_type base_type)
 +{
 +   return glsl_type::get_instance(base_type, 1, 1);
 +}
 +
 +const glsl_type *
 +glsl_vector_type(enum glsl_base_type base_type, unsigned components)
 +{
 +   assert(components > 1 && components <= 4);
 +   return glsl_type::get_instance(base_type, components, 1);
 +}
 +
 +const glsl_type *
 +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
 +{
 +   assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
 +   return glsl_type::get_instance(base_type, rows, columns);
 +}
 +
  const glsl_type *
  glsl_array_type(const glsl_type *base, unsigned elements)
  {
     return glsl_type::get_array_instance(base, elements);
  }
 +
 +const glsl_type *
 +glsl_struct_type(const glsl_struct_field *fields,
 +                 unsigned num_fields, const char *name)
 +{
 +   return glsl_type::get_record_instance(fields, num_fields, name);
 +}
 +
 +const struct glsl_type *
 +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
 +                  enum glsl_base_type base_type)
 +{
 +   return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
 +}
 +
 +const glsl_type *
 +glsl_function_type(const glsl_type *return_type,
 +                   const glsl_function_param *params, unsigned num_params)
 +{
 +   return glsl_type::get_function_instance(return_type, params, num_params);
 +}
 +
 +const glsl_type *
 +glsl_transposed_type(const struct glsl_type *type)
 +{
 +   return glsl_type::get_instance(type->base_type, type->matrix_columns,
 +                                  type->vector_elements);
 +}
diff --combined src/glsl/nir/nir_types.h
index a2fa7934e16c5125235004e47daf3d1bb7d61e86,49d6a65e7c4c64ddfd3e1793229365652d972291..1a0cb1fb77482b72f8abb8387f2dad44cc35ed93
@@@ -27,6 -27,8 +27,8 @@@
  
  #pragma once
  
+ #include <stdio.h>
  /* C wrapper around glsl_types.h */
  
  #include "../glsl_types.h"
@@@ -37,8 -39,6 +39,6 @@@ extern "C" 
  struct glsl_type;
  #endif
  
- #include <stdio.h>
  void glsl_print_type(const struct glsl_type *type, FILE *fp);
  void glsl_print_struct(const struct glsl_type *type, FILE *fp);
  
@@@ -49,12 -49,6 +49,12 @@@ const struct glsl_type *glsl_get_array_
  
  const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
  
 +const struct glsl_type *
 +glsl_get_function_return_type(const struct glsl_type *type);
 +
 +const struct glsl_function_param *
 +glsl_get_function_param(const struct glsl_type *type, unsigned index);
 +
  enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
  
  unsigned glsl_get_vector_elements(const struct glsl_type *type);
@@@ -68,44 -62,20 +68,47 @@@ unsigned glsl_get_length(const struct g
  const char *glsl_get_struct_elem_name(const struct glsl_type *type,
                                        unsigned index);
  
 +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
 +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
 +
+ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+                                          unsigned length);
  bool glsl_type_is_void(const struct glsl_type *type);
  bool glsl_type_is_vector(const struct glsl_type *type);
  bool glsl_type_is_scalar(const struct glsl_type *type);
 +bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
  bool glsl_type_is_matrix(const struct glsl_type *type);
 +bool glsl_type_is_array(const struct glsl_type *type);
 +bool glsl_type_is_struct(const struct glsl_type *type);
 +bool glsl_type_is_sampler(const struct glsl_type *type);
 +bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
 +bool glsl_sampler_type_is_array(const struct glsl_type *type);
  
  const struct glsl_type *glsl_void_type(void);
  const struct glsl_type *glsl_float_type(void);
 -const struct glsl_type *glsl_vec4_type(void);
 +const struct glsl_type *glsl_int_type(void);
  const struct glsl_type *glsl_uint_type(void);
 +const struct glsl_type *glsl_bool_type(void);
 +
 +const struct glsl_type *glsl_vec4_type(void);
 +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
 +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
 +                                         unsigned components);
 +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
 +                                         unsigned rows, unsigned columns);
  const struct glsl_type *glsl_array_type(const struct glsl_type *base,
                                          unsigned elements);
 +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
 +                                         unsigned num_fields, const char *name);
 +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
 +                                          bool is_shadow, bool is_array,
 +                                          enum glsl_base_type base_type);
 +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
 +                                            const struct glsl_function_param *params,
 +                                            unsigned num_params);
 +
 +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
  
  #ifdef __cplusplus
  }
index 6ff9553d6fe2f48ad470d611d97e6faaf9908984,ea9334fd7b7f403323ed98c988e83af4ee286027..1af50d6b68de6d0648e1e98788b34fed13b0bcc4
  #include "util/ralloc.h"
  #include "util/strtod.h"
  
 +extern "C" void
 +_mesa_error_no_memory(const char *caller)
 +{
 +   fprintf(stderr, "Mesa error: out of memory in %s", caller);
 +}
 +
  void
  _mesa_warning(struct gl_context *ctx, const char *fmt, ...)
  {
@@@ -68,7 -62,7 +68,7 @@@ _mesa_reference_shader(struct gl_contex
  }
  
  void
- _mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+ _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
                     const char *, int)
  {
  }
@@@ -107,7 -101,7 +107,7 @@@ _mesa_clear_shader_program_data(struct 
  
     ralloc_free(shProg->UniformBlocks);
     shProg->UniformBlocks = NULL;
-    shProg->NumUniformBlocks = 0;
+    shProg->NumBufferInterfaceBlocks = 0;
     for (i = 0; i < MESA_SHADER_STAGES; i++) {
        ralloc_free(shProg->UniformBlockStageIndex[i]);
        shProg->UniformBlockStageIndex[i] = NULL;
index 49ff428b13a2eb1cc47a0c6bd53baf427e9b6fc1,8b790fe0bca18fa76c7366d70bf65d455116e3d2..2479182e37049ccafebf061bfcf5de013dba8604
@@@ -194,7 -194,6 +194,6 @@@ enum brw_state_id 
     BRW_STATE_GS_CONSTBUF,
     BRW_STATE_PROGRAM_CACHE,
     BRW_STATE_STATE_BASE_ADDRESS,
-    BRW_STATE_VUE_MAP_VS,
     BRW_STATE_VUE_MAP_GEOM_OUT,
     BRW_STATE_TRANSFORM_FEEDBACK,
     BRW_STATE_RASTERIZER_DISCARD,
     BRW_STATE_SAMPLER_STATE_TABLE,
     BRW_STATE_VS_ATTRIB_WORKAROUNDS,
     BRW_STATE_COMPUTE_PROGRAM,
+    BRW_STATE_CS_WORK_GROUPS,
     BRW_NUM_STATE_BITS
  };
  
  #define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
  #define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
  #define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
- #define BRW_NEW_VUE_MAP_VS              (1ull << BRW_STATE_VUE_MAP_VS)
  #define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
  #define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
  #define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
  #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
  #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
  #define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+ #define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
  
  struct brw_state_flags {
     /** State update flags signalled by mesa internals */
@@@ -361,12 -361,6 +361,12 @@@ struct brw_stage_prog_data 
        /** @} */
     } binding_table;
  
 +   uint32_t *map_entries;
 +   struct {
 +      uint32_t index_count;
 +      uint32_t *index;
 +   } bind_map[8]; /* MAX_SETS from vulkan/private.h */
 +
     GLuint nr_params;       /**< number of float params/constants */
     GLuint nr_pull_params;
     unsigned nr_image_params;
@@@ -504,6 -498,16 +504,16 @@@ struct brw_cs_prog_data 
     GLuint dispatch_grf_start_reg_16;
     unsigned local_size[3];
     unsigned simd_size;
+    bool uses_barrier;
+    bool uses_num_work_groups;
+    struct {
+       /** @{
+        * surface indices the CS-specific surfaces
+        */
+       uint32_t work_groups_start;
+       /** @} */
+    } binding_table;
  };
  
  /**
@@@ -545,6 -549,17 +555,17 @@@ struct brw_vue_map 
      */
     GLbitfield64 slots_valid;
  
+    /**
+     * Is this VUE map for a separate shader pipeline?
+     *
+     * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+     * without the linker having a chance to dead code eliminate unused varyings.
+     *
+     * This means that we have to use a fixed slot layout, based on the output's
+     * location field, rather than assigning slots in a compact contiguous block.
+     */
+    bool separate;
     /**
      * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
      * not stored in a slot (because they are not written, or because
@@@ -590,7 -605,8 +611,8 @@@ static inline GLuint brw_varying_to_off
  
  void brw_compute_vue_map(const struct brw_device_info *devinfo,
                           struct brw_vue_map *vue_map,
-                          GLbitfield64 slots_valid);
+                          GLbitfield64 slots_valid,
+                          bool separate_shader);
  
  
  /**
@@@ -753,7 -769,8 +775,8 @@@ struct brw_vs_prog_data 
                              12 + /* ubo */                              \
                              BRW_MAX_ABO +                               \
                              BRW_MAX_IMAGES +                            \
-                             2 /* shader time, pull constants */)
+                             2 + /* shader time, pull constants */       \
+                             1 /* cs num work groups */)
  
  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
  
@@@ -787,6 -804,11 +810,11 @@@ struct brw_gs_prog_dat
  
     bool include_primitive_id;
  
+    /**
+     * The number of vertices emitted, if constant - otherwise -1.
+     */
+    int static_vertex_count;
     int invocations;
  
     /**
@@@ -1241,6 -1263,17 +1269,17 @@@ struct brw_contex
        uint32_t draw_params_offset;
     } draw;
  
+    struct {
+       /**
+        * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+        * an indirect call, and num_work_groups_offset is valid. Otherwise,
+        * num_work_groups is set based on glDispatchCompute.
+        */
+       drm_intel_bo *num_work_groups_bo;
+       GLintptr num_work_groups_offset;
+       const GLuint *num_work_groups;
+    } compute;
     struct {
        struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
        struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
        GLuint curbe_offset;
     } curbe;
  
-    /**
-     * Layout of vertex data exiting the vertex shader.
-     *
-     * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
-     */
-    struct brw_vue_map vue_map_vs;
     /**
      * Layout of vertex data exiting the geometry portion of the pipleine.
-     * This comes from the geometry shader if one exists, otherwise from the
-     * vertex shader.
+     * This comes from the last enabled shader stage (GS, DS, or VS).
      *
      * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
      */
  
     int num_atoms[BRW_NUM_PIPELINES];
     const struct brw_tracked_state render_atoms[60];
-    const struct brw_tracked_state compute_atoms[4];
+    const struct brw_tracked_state compute_atoms[7];
  
     /* If (INTEL_DEBUG & DEBUG_BATCH) */
     struct {
@@@ -1784,6 -1809,12 +1815,12 @@@ void brw_create_constant_surface(struc
                                   uint32_t size,
                                   uint32_t *out_offset,
                                   bool dword_pitch);
+ void brw_create_buffer_surface(struct brw_context *brw,
+                                drm_intel_bo *bo,
+                                uint32_t offset,
+                                uint32_t size,
+                                uint32_t *out_offset,
+                                bool dword_pitch);
  void brw_update_buffer_texture_surface(struct gl_context *ctx,
                                         unsigned unit,
                                         uint32_t *surf_offset);
@@@ -2063,11 -2094,6 +2100,6 @@@ void gen8_hiz_exec(struct brw_context *
  
  uint32_t get_hw_prim_for_gl_prim(int mode);
  
- void
- brw_setup_vue_key_clip_info(struct brw_context *brw,
-                             struct brw_vue_prog_key *key,
-                             bool program_uses_clip_distance);
  void
  gen6_upload_push_constants(struct brw_context *brw,
                             const struct gl_program *prog,
index f9dcdc735b338c991e0f094e570c4c83037455b5,393f17ac98c6abb9c8a1e27707d920b3b1798e5a..a8cde20e04583b9f847d577142fb074d92e42016
@@@ -57,7 -57,6 +57,7 @@@
  # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
  # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
  
 +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */
  #define _3DPRIM_POINTLIST         0x01
  #define _3DPRIM_LINELIST          0x02
  #define _3DPRIM_LINESTRIP         0x03
  #define _3DPRIM_TRIFAN            0x06
  #define _3DPRIM_QUADLIST          0x07
  #define _3DPRIM_QUADSTRIP         0x08
- #define _3DPRIM_LINELIST_ADJ      0x09
- #define _3DPRIM_LINESTRIP_ADJ     0x0A
- #define _3DPRIM_TRILIST_ADJ       0x0B
- #define _3DPRIM_TRISTRIP_ADJ      0x0C
+ #define _3DPRIM_LINELIST_ADJ      0x09 /* G45+ */
+ #define _3DPRIM_LINESTRIP_ADJ     0x0A /* G45+ */
+ #define _3DPRIM_TRILIST_ADJ       0x0B /* G45+ */
+ #define _3DPRIM_TRISTRIP_ADJ      0x0C /* G45+ */
  #define _3DPRIM_TRISTRIP_REVERSE  0x0D
  #define _3DPRIM_POLYGON           0x0E
  #define _3DPRIM_RECTLIST          0x0F
@@@ -78,8 -77,7 +78,8 @@@
  #define _3DPRIM_LINESTRIP_CONT    0x12
  #define _3DPRIM_LINESTRIP_BF      0x13
  #define _3DPRIM_LINESTRIP_CONT_BF 0x14
- #define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+ #define _3DPRIM_TRIFAN_NOSTIPPLE  0x16
 +#endif
  
  /* We use this offset to be able to pass native primitive types in struct
   * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
@@@ -981,6 -979,7 +981,7 @@@ enum opcode 
     SHADER_OPCODE_TG4_LOGICAL,
     SHADER_OPCODE_TG4_OFFSET,
     SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+    SHADER_OPCODE_SAMPLEINFO,
  
     /**
      * Combines multiple sources of size 1 into a larger virtual GRF.
     FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+    FS_OPCODE_GET_BUFFER_SIZE,
     FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
     FS_OPCODE_DISCARD_JUMP,
     FS_OPCODE_SET_SAMPLE_ID,
     VS_OPCODE_PULL_CONSTANT_LOAD,
     VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
     VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+    VS_OPCODE_GET_BUFFER_SIZE,
     VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
  
     /**
@@@ -1513,6 -1516,7 +1518,7 @@@ enum brw_message_target 
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4      8
  #define GEN5_SAMPLER_MESSAGE_LOD                 9
  #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+ #define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO   11
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C    16
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO   17
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
  
  #define BRW_URB_OPCODE_WRITE_HWORD  0
  #define BRW_URB_OPCODE_WRITE_OWORD  1
- #define GEN8_URB_OPCODE_SIMD8_WRITE  7
+ #define BRW_URB_OPCODE_READ_HWORD   2
+ #define BRW_URB_OPCODE_READ_OWORD   3
+ #define GEN7_URB_OPCODE_ATOMIC_MOV  4
+ #define GEN7_URB_OPCODE_ATOMIC_INC  5
+ #define GEN8_URB_OPCODE_ATOMIC_ADD  6
+ #define GEN8_URB_OPCODE_SIMD8_WRITE 7
+ #define GEN8_URB_OPCODE_SIMD8_READ  8
  
  #define BRW_URB_SWIZZLE_NONE          0
  #define BRW_URB_SWIZZLE_INTERLEAVE    1
  /* DW3: PS */
  
  #define _3DSTATE_SAMPLER_STATE_POINTERS_VS    0x782B /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_HS    0x782C /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_DS    0x782D /* GEN7+ */
  #define _3DSTATE_SAMPLER_STATE_POINTERS_GS    0x782E /* GEN7+ */
  #define _3DSTATE_SAMPLER_STATE_POINTERS_PS    0x782F /* GEN7+ */
  
  #define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
  
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_HS         0x7913 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_DS         0x7914 /* GEN7+ */
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
  # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
  # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK                INTEL_MASK(25, 16)
  # define GEN6_GS_ENABLE                                       (1 << 15)
  
+ /* Gen8+ DW8 */
+ # define GEN8_GS_STATIC_OUTPUT                          (1 << 30)
+ # define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT              16
+ # define GEN8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
  /* Gen8+ DW9 */
  # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
  # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
  #define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES          (62*16)
  
  #define _3DSTATE_HS                             0x781B /* GEN7+ */
+ /* DW1 */
+ # define GEN7_HS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+ # define GEN7_HS_SAMPLER_COUNT_SHIFT                    27
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+ # define GEN7_HS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+ # define GEN7_HS_FLOATING_POINT_MODE_ALT                (1 << 16)
+ # define GEN7_HS_MAX_THREADS_SHIFT                      0
+ /* DW2 */
+ # define GEN7_HS_ENABLE                                 (1 << 31)
+ # define GEN7_HS_STATISTICS_ENABLE                      (1 << 29)
+ # define GEN8_HS_MAX_THREADS_SHIFT                      8
+ # define GEN7_HS_INSTANCE_COUNT_MASK                    INTEL_MASK(3, 0)
+ # define GEN7_HS_INSTANCE_COUNT_SHIFT                   0
+ /* DW5 */
+ # define GEN7_HS_SINGLE_PROGRAM_FLOW                    (1 << 27)
+ # define GEN7_HS_VECTOR_MASK_ENABLE                     (1 << 26)
+ # define HSW_HS_ACCESSES_UAV                            (1 << 25)
+ # define GEN7_HS_INCLUDE_VERTEX_HANDLES                 (1 << 24)
+ # define GEN7_HS_DISPATCH_START_GRF_MASK                INTEL_MASK(23, 19)
+ # define GEN7_HS_DISPATCH_START_GRF_SHIFT               19
+ # define GEN7_HS_URB_READ_LENGTH_MASK                   INTEL_MASK(16, 11)
+ # define GEN7_HS_URB_READ_LENGTH_SHIFT                  11
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT            4
  #define _3DSTATE_TE                             0x781C /* GEN7+ */
+ /* DW1 */
+ # define GEN7_TE_PARTITIONING_SHIFT                     12
+ # define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT                  8
+ # define GEN7_TE_DOMAIN_SHIFT                           4
+ //# define GEN7_TE_MODE_SW                                (1 << 1)
+ # define GEN7_TE_ENABLE                                 (1 << 0)
  #define _3DSTATE_DS                             0x781D /* GEN7+ */
+ /* DW2 */
+ # define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH           (1 << 31)
+ # define GEN7_DS_VECTOR_MASK_ENABLE                     (1 << 30)
+ # define GEN7_DS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+ # define GEN7_DS_SAMPLER_COUNT_SHIFT                    27
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+ # define GEN7_DS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+ # define GEN7_DS_FLOATING_POINT_MODE_ALT                (1 << 16)
+ # define HSW_DS_ACCESSES_UAV                            (1 << 14)
+ /* DW4 */
+ # define GEN7_DS_DISPATCH_START_GRF_MASK                INTEL_MASK(24, 20)
+ # define GEN7_DS_DISPATCH_START_GRF_SHIFT               20
+ # define GEN7_DS_URB_READ_LENGTH_MASK                   INTEL_MASK(17, 11)
+ # define GEN7_DS_URB_READ_LENGTH_SHIFT                  11
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT            4
+ /* DW5 */
+ # define GEN7_DS_MAX_THREADS_SHIFT                      25
+ # define HSW_DS_MAX_THREADS_SHIFT                       21
+ # define GEN7_DS_STATISTICS_ENABLE                      (1 << 10)
+ # define GEN7_DS_SIMD8_DISPATCH_ENABLE                  (1 << 3)
+ # define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE            (1 << 2)
+ # define GEN7_DS_CACHE_DISABLE                          (1 << 1)
+ # define GEN7_DS_ENABLE                                 (1 << 0)
+ /* Gen8+ DW8 */
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK           INTEL_MASK(26, 21)
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+ # define GEN8_DS_URB_OUTPUT_LENGTH_MASK                 INTEL_MASK(20, 16)
+ # define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT                16
+ # define GEN8_DS_USER_CLIP_DISTANCE_MASK                INTEL_MASK(15, 8)
+ # define GEN8_DS_USER_CLIP_DISTANCE_SHIFT               8
+ # define GEN8_DS_USER_CULL_DISTANCE_MASK                INTEL_MASK(7, 0)
+ # define GEN8_DS_USER_CULL_DISTANCE_SHIFT               0
  
  #define _3DSTATE_CLIP                         0x7812 /* GEN6+ */
  /* DW1 */
@@@ -2268,6 -2355,21 +2357,21 @@@ enum brw_pixel_shader_computed_depth_mo
     BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
  };
  
+ enum brw_pixel_shader_coverage_mask_mode {
+    BRW_PSICMS_OFF     = 0, /* PS does not use input coverage masks. */
+    BRW_PSICMS_NORMAL  = 1, /* Input Coverage masks based on outer conservatism
+                             * and factors in SAMPLE_MASK.  If Pixel is
+                             * conservatively covered, all samples are enabled.
+                             */
+    BRW_PSICMS_INNER   = 2, /* Input Coverage masks based on inner conservatism
+                             * and factors in SAMPLE_MASK.  If Pixel is
+                             * conservatively *FULLY* covered, all samples are
+                             * enabled.
+                             */
+    BRW_PCICMS_DEPTH   = 3,
+ };
  #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
  /* DW1 */
  # define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
  # define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
  # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
  # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
+ # define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT     0
  
  enum brw_wm_barycentric_interp_mode {
     BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC               = 0,
  # define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
  # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
  
+ #define MEDIA_CURBE_LOAD                        0x7001
  #define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+ /* GEN7 DW4, GEN8+ DW5 */
+ # define MEDIA_CURBE_READ_LENGTH_SHIFT          16
+ # define MEDIA_CURBE_READ_LENGTH_MASK           INTEL_MASK(31, 16)
+ # define MEDIA_CURBE_READ_OFFSET_SHIFT          0
+ # define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
  /* GEN7 DW5, GEN8+ DW6 */
+ # define MEDIA_BARRIER_ENABLE_SHIFT             21
+ # define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
  # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
  # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
  # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
  # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
  #define MEDIA_STATE_FLUSH                       0x7004
  #define GPGPU_WALKER                            0x7105
+ /* GEN7 DW0 */
+ # define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
  /* GEN8+ DW2 */
  # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
  # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
index 16c125d07eebc3e8757a01c344400a8463e699c6,a6a3bb670cae2e4a316a9bcc9b7e51ce26171ad1..65172490da317f34f3898d895589e7047e29e690
@@@ -314,7 -314,7 +314,7 @@@ static const struct brw_device_info brw
     .max_wm_threads = 64 * 6,                        \
     .max_cs_threads = 56,                            \
     .urb = {                                         \
-       .size = 192,                                  \
+       .size = 384,                                  \
        .min_vs_entries = 64,                         \
        .max_vs_entries = 1856,                       \
        .max_hs_entries = 672,                        \
  
  static const struct brw_device_info brw_device_info_skl_gt1 = {
     GEN9_FEATURES, .gt = 1,
+    .urb.size = 192,
  };
  
  static const struct brw_device_info brw_device_info_skl_gt2 = {
@@@ -373,15 -374,3 +374,15 @@@ brw_get_device_info(int devid, int revi
  
     return devinfo;
  }
 +
 +const char *
 +brw_get_device_name(int devid)
 +{
 +   switch (devid) {
 +#undef CHIPSET
 +#define CHIPSET(id, family, name) case id: return name;
 +#include "pci_ids/i965_pci_ids.h"
 +   default:
 +      return NULL;
 +   }
 +}
index 76530a476d6be39a35c562f088c774dfacb592ba,64215ae5a6aa8344111691a9ca7145eec65b9f41..e620301fde7e16f89bfe9995370092cb64649a17
@@@ -42,6 -42,7 +42,7 @@@
  #include "brw_eu.h"
  #include "brw_wm.h"
  #include "brw_fs.h"
+ #include "brw_cs.h"
  #include "brw_cfg.h"
  #include "brw_dead_control_flow.h"
  #include "main/uniforms.h"
@@@ -491,7 -492,6 +492,7 @@@ type_size_scalar(const struct glsl_typ
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
     case GLSL_TYPE_DOUBLE:
 +   case GLSL_TYPE_FUNCTION:
        unreachable("not reached");
     }
  
@@@ -797,6 -797,7 +798,7 @@@ fs_inst::regs_read(int arg) cons
        break;
  
     case CS_OPCODE_CS_TERMINATE:
+    case SHADER_OPCODE_BARRIER:
        return 1;
  
     default:
@@@ -878,9 -879,11 +880,11 @@@ fs_visitor::implied_mrf_writes(fs_inst 
     case SHADER_OPCODE_TXL:
     case SHADER_OPCODE_TXS:
     case SHADER_OPCODE_LOD:
+    case SHADER_OPCODE_SAMPLEINFO:
        return 1;
     case FS_OPCODE_FB_WRITE:
        return 2;
+    case FS_OPCODE_GET_BUFFER_SIZE:
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
     case SHADER_OPCODE_GEN4_SCRATCH_READ:
        return 1;
@@@ -1394,6 -1397,9 +1398,9 @@@ fs_visitor::assign_curb_setup(
         }
        }
     }
+    /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+    this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
  }
  
  void
@@@ -1434,7 -1440,8 +1441,8 @@@ fs_visitor::calculate_urb_setup(
            */
           struct brw_vue_map prev_stage_vue_map;
           brw_compute_vue_map(devinfo, &prev_stage_vue_map,
-                              key->input_slots_valid);
+                              key->input_slots_valid,
+                              shader_prog->SeparateShader);
           int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
           assert(prev_stage_vue_map.num_slots <= first_slot + 32);
           for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@@@ -1508,8 -1515,7 +1516,7 @@@ fs_visitor::assign_urb_setup(
     }
  
     /* Each attribute is 4 setup channels, each of which is half a reg. */
-    this->first_non_payload_grf =
-       urb_start + prog_data->num_varying_inputs * 2;
+    this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
  }
  
  void
@@@ -1524,16 -1530,11 +1531,15 @@@ fs_visitor::assign_vs_urb_setup(
        count++;
  
     /* Each attribute is 4 regs. */
-    this->first_non_payload_grf =
-       payload.num_regs + prog_data->curb_read_length + count * 4;
+    this->first_non_payload_grf += count * 4;
  
     unsigned vue_entries =
        MAX2(count, vs_prog_data->base.vue_map.num_slots);
  
 +   /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
 +    * command).  Each attribute is 16 bytes (4 floats/dwords), so each unit
 +    * fits four attributes.
 +    */
     vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
     vs_prog_data->base.urb_read_length = (count + 1) / 2;
  
  
              inst->src[i].file = HW_REG;
              inst->src[i].fixed_hw_reg =
-                retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+                stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                   inst->src[i].subreg_offset),
+                       inst->exec_size * inst->src[i].stride,
+                       inst->exec_size, inst->src[i].stride);
           }
        }
     }
@@@ -2646,22 -2650,9 +2655,22 @@@ fs_visitor::emit_repclear_shader(
     brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
     int base_mrf = 1;
     int color_mrf = base_mrf + 2;
 +   fs_inst *mov;
  
 -   fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
 -                                     fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
 +   if (uniforms == 1) {
 +      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
 +                               fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
 +   } else {
 +      struct brw_reg reg =
 +         brw_reg(BRW_GENERAL_REGISTER_FILE,
 +                 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
 +                 BRW_VERTICAL_STRIDE_8,
 +                 BRW_WIDTH_2,
 +                 BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 +
 +      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
 +                               fs_reg(reg));
 +   }
  
     fs_inst *write;
     if (key->nr_color_regions == 1) {
     assign_curb_setup();
  
     /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
 -   assert(mov->src[0].file == HW_REG);
 -   mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
 +   if (uniforms == 1) {
 +      assert(mov->src[0].file == HW_REG);
 +      mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
 +   }
  }
  
  /**
@@@ -2811,7 -2800,7 +2820,7 @@@ fs_visitor::insert_gen4_pre_send_depend
  {
     int write_len = inst->regs_written;
     int first_write_grf = inst->dst.reg;
-    bool needs_dep[BRW_MAX_MRF];
+    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
     assert(write_len < (int)sizeof(needs_dep) - 1);
  
     memset(needs_dep, false, sizeof(needs_dep));
@@@ -2882,7 -2871,7 +2891,7 @@@ fs_visitor::insert_gen4_post_send_depen
  {
     int write_len = inst->regs_written;
     int first_write_grf = inst->dst.reg;
-    bool needs_dep[BRW_MAX_MRF];
+    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
     assert(write_len < (int)sizeof(needs_dep) - 1);
  
     memset(needs_dep, false, sizeof(needs_dep));
@@@ -3212,7 -3201,8 +3221,8 @@@ fs_visitor::lower_integer_multiplicatio
               * schedule multi-component multiplications much better.
               */
  
-             if (inst->conditional_mod && inst->dst.is_null()) {
+             fs_reg orig_dst = inst->dst;
+             if (orig_dst.is_null() || orig_dst.file == MRF) {
                 inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                    inst->dst.type);
              }
  
              ibld.ADD(dst, low, high);
  
-             if (inst->conditional_mod) {
-                fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+             if (inst->conditional_mod || orig_dst.file == MRF) {
                 set_condmod(inst->conditional_mod,
-                            ibld.MOV(null, inst->dst));
+                            ibld.MOV(orig_dst, inst->dst));
              }
           }
  
@@@ -4749,10 -4738,19 +4758,19 @@@ fs_visitor::setup_cs_payload(
     assert(devinfo->gen >= 7);
  
     payload.num_regs = 1;
+    if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+       const unsigned local_id_dwords =
+          brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+       assert((local_id_dwords & 0x7) == 0);
+       const unsigned local_id_regs = local_id_dwords / 8;
+       payload.local_invocation_id_reg = payload.num_regs;
+       payload.num_regs += local_id_regs;
+    }
  }
  
  void
- fs_visitor::assign_binding_table_offsets()
+ fs_visitor::assign_fs_binding_table_offsets()
  {
     assert(stage == MESA_SHADER_FRAGMENT);
     brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
     assign_common_binding_table_offsets(next_binding_table_offset);
  }
  
+ void
+ fs_visitor::assign_cs_binding_table_offsets()
+ {
+    assert(stage == MESA_SHADER_COMPUTE);
+    brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+    uint32_t next_binding_table_offset = 0;
+    /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+    prog_data->binding_table.work_groups_start = next_binding_table_offset;
+    next_binding_table_offset++;
+    assign_common_binding_table_offsets(next_binding_table_offset);
+ }
  void
  fs_visitor::calculate_register_pressure()
  {
  void
  fs_visitor::optimize()
  {
+    /* Start by validating the shader we currently have. */
+    validate();
     /* bld is the common builder object pointing at the end of the program we
      * used to translate it into i965 IR.  For the optimization and lowering
      * passes coming next, any code added after the end of the program without
     assign_constant_locations();
     demote_pull_constants();
  
+    validate();
     split_virtual_grfs();
+    validate();
  
  #define OPT(pass, args...) ({                                           \
        pass_num++;                                                       \
           backend_shader::dump_instructions(filename);                   \
        }                                                                 \
                                                                          \
+       validate();                                                       \
+                                                                         \
        progress = progress || this_progress;                             \
        this_progress;                                                    \
     })
     OPT(lower_integer_multiplication);
  
     lower_uniform_pull_constant_loads();
+    validate();
  }
  
  /**
@@@ -4971,8 -4993,7 +5013,8 @@@ fs_visitor::run_vs(gl_clip_plane *clip_
  {
     assert(stage == MESA_SHADER_VERTEX);
  
 -   assign_common_binding_table_offsets(0);
 +   if (prog_data->map_entries == NULL)
 +      assign_common_binding_table_offsets(0);
     setup_vs_payload();
  
     if (shader_time_index >= 0)
@@@ -5011,8 -5032,9 +5053,10 @@@ fs_visitor::run_fs(bool do_rep_send
  
     assert(stage == MESA_SHADER_FRAGMENT);
  
 -   assign_fs_binding_table_offsets();
+    sanity_param_count = prog->Parameters->NumParameters;
-       assign_binding_table_offsets();
 +   if (prog_data->map_entries == NULL)
++      assign_fs_binding_table_offsets();
  
     if (devinfo->gen >= 6)
        setup_payload_gen6();
     else
        wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
  
 -   /* If any state parameters were appended, then ParameterValues could have
 -    * been realloced, in which case the driver uniform storage set up by
 -    * _mesa_associate_uniform_storage() would point to freed memory.  Make
 -    * sure that didn't happen.
 -    */
 -   assert(sanity_param_count == prog->Parameters->NumParameters);
 -
     return !failed;
  }
  
@@@ -5093,7 -5122,7 +5137,7 @@@ fs_visitor::run_cs(
  
     sanity_param_count = prog->Parameters->NumParameters;
  
-    assign_common_binding_table_offsets(0);
+    assign_cs_binding_table_offsets();
  
     setup_cs_payload();
  
@@@ -5141,20 -5170,11 +5185,11 @@@ brw_wm_fs_emit(struct brw_context *brw
                 struct gl_shader_program *prog,
                 unsigned *final_assembly_size)
  {
-    bool start_busy = false;
-    double start_time = 0;
-    if (unlikely(brw->perf_debug)) {
-       start_busy = (brw->batch.last_bo &&
-                     drm_intel_bo_busy(brw->batch.last_bo));
-       start_time = get_time();
-    }
     struct brw_shader *shader = NULL;
     if (prog)
        shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  
 -   if (unlikely(INTEL_DEBUG & DEBUG_WM))
 +   if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir)
        brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
  
     int st_index8 = -1, st_index16 = -1;
     if (simd16_cfg)
        prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
  
-    if (unlikely(brw->perf_debug) && shader) {
-       if (shader->compiled_once)
-          brw_wm_debug_recompile(brw, prog, key);
-       shader->compiled_once = true;
-       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-                     (get_time() - start_time) * 1000);
-       }
-    }
     return g.get_assembly(final_assembly_size);
  }
  
- extern "C" bool
- brw_fs_precompile(struct gl_context *ctx,
-                   struct gl_shader_program *shader_prog,
-                   struct gl_program *prog)
+ fs_reg *
+ fs_visitor::emit_cs_local_invocation_id_setup()
  {
-    struct brw_context *brw = brw_context(ctx);
-    struct brw_wm_prog_key key;
-    struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
-    struct brw_fragment_program *bfp = brw_fragment_program(fp);
-    bool program_uses_dfdy = fp->UsesDFdy;
-    memset(&key, 0, sizeof(key));
+    assert(stage == MESA_SHADER_COMPUTE);
  
-    if (brw->gen < 6) {
-       if (fp->UsesKill)
-          key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
  
-       if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-          key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+    struct brw_reg src =
+       brw_vec8_grf(payload.local_invocation_id_reg, 0);
+    src = retype(src, BRW_REGISTER_TYPE_UD);
+    bld.MOV(*reg, src);
+    src.nr += dispatch_width / 8;
+    bld.MOV(offset(*reg, bld, 1), src);
+    src.nr += dispatch_width / 8;
+    bld.MOV(offset(*reg, bld, 2), src);
  
-       /* Just assume depth testing. */
-       key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
-    }
+    return reg;
+ }
  
-    if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
-                                          BRW_FS_VARYING_INPUT_MASK) > 16)
-       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+ fs_reg *
+ fs_visitor::emit_cs_work_group_id_setup()
+ {
+    assert(stage == MESA_SHADER_COMPUTE);
  
-    brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
  
-    if (fp->Base.InputsRead & VARYING_BIT_POS) {
-       key.drawable_height = ctx->DrawBuffer->Height;
-    }
+    struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+    struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+    struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
  
-    key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
-          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
-          BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+    bld.MOV(*reg, r0_1);
+    bld.MOV(offset(*reg, bld, 1), r0_6);
+    bld.MOV(offset(*reg, bld, 2), r0_7);
  
-    if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
-       key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
-                           key.nr_color_regions > 1;
-    }
+    return reg;
+ }
  
-    key.program_string_id = bfp->id;
+ const unsigned *
+ brw_cs_emit(struct brw_context *brw,
+             void *mem_ctx,
+             const struct brw_cs_prog_key *key,
+             struct brw_cs_prog_data *prog_data,
+             struct gl_compute_program *cp,
+             struct gl_shader_program *prog,
+             unsigned *final_assembly_size)
+ {
+    struct brw_shader *shader =
+       (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
  
-    uint32_t old_prog_offset = brw->wm.base.prog_offset;
-    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+    if (unlikely(INTEL_DEBUG & DEBUG_CS))
+       brw_dump_ir("compute", prog, &shader->base, &cp->Base);
  
-    bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+    prog_data->local_size[0] = cp->LocalSize[0];
+    prog_data->local_size[1] = cp->LocalSize[1];
+    prog_data->local_size[2] = cp->LocalSize[2];
+    unsigned local_workgroup_size =
+       cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
  
-    brw->wm.base.prog_offset = old_prog_offset;
-    brw->wm.prog_data = old_prog_data;
+    cfg_t *cfg = NULL;
+    const char *fail_msg = NULL;
  
-    return success;
- }
+    int st_index = -1;
+    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+       st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
  
- void
- brw_setup_tex_for_precompile(struct brw_context *brw,
-                              struct brw_sampler_prog_key_data *tex,
-                              struct gl_program *prog)
- {
-    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-    unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
-    for (unsigned i = 0; i < sampler_count; i++) {
-       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
-          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
-          tex->swizzles[i] =
-             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+    /* Now the main event: Visit the shader IR and generate our CS IR for it.
+     */
+    fs_visitor v8(brw->intelScreen->compiler, brw,
+                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                  &cp->Base, 8, st_index);
+    if (!v8.run_cs()) {
+       fail_msg = v8.fail_msg;
+    } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+       cfg = v8.cfg;
+       prog_data->simd_size = 8;
+    }
+    fs_visitor v16(brw->intelScreen->compiler, brw,
+                   mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                   &cp->Base, 16, st_index);
+    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+        !fail_msg && !v8.simd16_unsupported &&
+        local_workgroup_size <= 16 * brw->max_cs_threads) {
+       /* Try a SIMD16 compile */
+       v16.import_uniforms(&v8);
+       if (!v16.run_cs()) {
+          perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+          if (!cfg) {
+             fail_msg =
+                "Couldn't generate SIMD16 program and not "
+                "enough threads for SIMD8";
+          }
        } else {
-          /* Color sampler: assume no swizzling. */
-          tex->swizzles[i] = SWIZZLE_XYZW;
+          cfg = v16.cfg;
+          prog_data->simd_size = 16;
        }
     }
+    if (unlikely(cfg == NULL)) {
+       assert(fail_msg);
+       prog->LinkStatus = false;
+       ralloc_strcat(&prog->InfoLog, fail_msg);
+       _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+                     fail_msg);
+       return NULL;
+    }
+    fs_generator g(brw->intelScreen->compiler, brw,
+                   mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+                   v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+    if (INTEL_DEBUG & DEBUG_CS) {
+       char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+                                    prog->Label ? prog->Label : "unnamed",
+                                    prog->Name);
+       g.enable_debug(name);
+    }
+    g.generate_code(cfg, prog_data->simd_size);
+    return g.get_assembly(final_assembly_size);
  }
index da8d47f1c5ec7d5599ddc8898bc5d2770c650b5d,7a965cd5b73328448f900a465f4759f1d1172991..61d0c896b8ea50e9b77076fdf7140ff713106568
  #include "brw_fs.h"
  #include "brw_fs_surface_builder.h"
  #include "brw_nir.h"
+ #include "brw_fs_surface_builder.h"
  
  using namespace brw;
+ using namespace brw::surface_access;
  
  void
  fs_visitor::emit_nir_code()
@@@ -42,8 -44,7 +44,8 @@@
      */
     nir_setup_inputs(nir);
     nir_setup_outputs(nir);
 -   nir_setup_uniforms(nir);
 +   uniforms = nir->num_uniforms;
 +   //nir_setup_uniforms(nir);
     nir_emit_system_values(nir);
  
     /* get the main function and emit it */
@@@ -338,6 -339,20 +340,20 @@@ emit_system_values_block(nir_block *blo
                                   BRW_REGISTER_TYPE_D));
           break;
  
+       case nir_intrinsic_load_local_invocation_id:
+          assert(v->stage == MESA_SHADER_COMPUTE);
+          reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+          if (reg->file == BAD_FILE)
+             *reg = *v->emit_cs_local_invocation_id_setup();
+          break;
+       case nir_intrinsic_load_work_group_id:
+          assert(v->stage == MESA_SHADER_COMPUTE);
+          reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+          if (reg->file == BAD_FILE)
+             *reg = *v->emit_cs_work_group_id_setup();
+          break;
        default:
           break;
        }
@@@ -1437,6 -1452,11 +1453,11 @@@ fs_visitor::nir_emit_intrinsic(const fs
        break;
     }
  
+    case nir_intrinsic_image_samples:
+       /* The driver does not support multi-sampled images. */
+       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+       break;
     case nir_intrinsic_load_front_face:
        bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
                *emit_frontfacing_interpolation());
     case nir_intrinsic_load_vertex_id:
        unreachable("should be lowered by lower_vertex_id()");
  
-    case nir_intrinsic_load_vertex_id_zero_base: {
-       fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-       assert(vertex_id.file != BAD_FILE);
-       dest.type = vertex_id.type;
-       bld.MOV(dest, vertex_id);
-       break;
-    }
-    case nir_intrinsic_load_base_vertex: {
-       fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-       assert(base_vertex.file != BAD_FILE);
-       dest.type = base_vertex.type;
-       bld.MOV(dest, base_vertex);
-       break;
-    }
-    case nir_intrinsic_load_instance_id: {
-       fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-       assert(instance_id.file != BAD_FILE);
-       dest.type = instance_id.type;
-       bld.MOV(dest, instance_id);
-       break;
-    }
-    case nir_intrinsic_load_sample_mask_in: {
-       fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
-       assert(sample_mask_in.file != BAD_FILE);
-       dest.type = sample_mask_in.type;
-       bld.MOV(dest, sample_mask_in);
+    case nir_intrinsic_load_vertex_id_zero_base:
+    case nir_intrinsic_load_base_vertex:
+    case nir_intrinsic_load_instance_id:
+    case nir_intrinsic_load_sample_mask_in:
+    case nir_intrinsic_load_sample_id: {
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       fs_reg val = nir_system_values[sv];
+       assert(val.file != BAD_FILE);
+       dest.type = val.type;
+       bld.MOV(dest, val);
        break;
     }
  
        break;
     }
  
-    case nir_intrinsic_load_sample_id: {
-       fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
-       assert(sample_id.file != BAD_FILE);
-       dest.type = sample_id.type;
-       bld.MOV(dest, sample_id);
-       break;
-    }
     case nir_intrinsic_load_uniform_indirect:
        has_indirect = true;
        /* fallthrough */
        has_indirect = true;
        /* fallthrough */
     case nir_intrinsic_load_ubo: {
 +      uint32_t set = instr->const_index[0];
        nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
        fs_reg surf_index;
  
        if (const_index) {
 -         surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
 -                             const_index->u[0]);
 +         uint32_t binding = const_index->u[0];
 +
 +         /* FIXME: We should probably assert here, but dota2 seems to hit
 +          * it and we'd like to keep going.
 +          */
 +         if (binding >= stage_prog_data->bind_map[set].index_count)
 +            binding = 0;
 +
 +         surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]);
        } else {
 +         assert(0 && "need more info from the ir for this.");
           /* The block index is not a constant. Evaluate the index expression
            * per-channel and add the base UBO index; we have to select a value
            * from any live channel.
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
        if (has_indirect) {
                                       BRW_REGISTER_TYPE_D),
                   fs_reg(2));
  
 -         unsigned vec4_offset = instr->const_index[0] / 4;
 +         unsigned vec4_offset = instr->const_index[1] / 4;
           for (int i = 0; i < instr->num_components; i++)
              VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
                                         base_offset, vec4_offset + i);
           fs_reg packed_consts = vgrf(glsl_type::float_type);
           packed_consts.type = dest.type;
  
 -         fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
 +         fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15);
           bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
                    surf_index, const_offset_reg);
  
        break;
     }
  
+    case nir_intrinsic_load_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_load_ssbo: {
+       assert(devinfo->gen >= 7);
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[0]);
+       fs_reg surf_index;
+       if (const_uniform_block) {
+          unsigned index = stage_prog_data->binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = fs_reg(index);
+          brw_mark_surface_used(prog_data, index);
+       } else {
+          surf_index = vgrf(glsl_type::uint_type);
+          bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start));
+          surf_index = bld.emit_uniformize(surf_index);
+          /* Assume this may touch any UBO. It would be nice to provide
+           * a tighter bound, but the array information is already lowered away.
+           */
+          brw_mark_surface_used(prog_data,
+                                stage_prog_data->binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+       /* Get the offset to read from */
+       fs_reg offset_reg = vgrf(glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+       }
+       /* Read the vector */
+       for (int i = 0; i < instr->num_components; i++) {
+          fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                 1 /* dims */, 1 /* size */,
+                                                 BRW_PREDICATE_NONE);
+          read_result.type = dest.type;
+          bld.MOV(dest, read_result);
+          dest = offset(dest, bld, 1);
+          /* Vector components are stored contiguous in memory */
+          if (i < instr->num_components) {
+             if (!has_indirect) {
+                const_offset_bytes += 4;
+                bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+             } else {
+                bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+             }
+          }
+       }
+       break;
+    }
     case nir_intrinsic_load_input_indirect:
        has_indirect = true;
        /* fallthrough */
        break;
     }
  
+    case nir_intrinsic_store_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_store_ssbo: {
+       assert(devinfo->gen >= 7);
+       /* Block index */
+       fs_reg surf_index;
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[1]);
+       if (const_uniform_block) {
+          unsigned index = stage_prog_data->binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = fs_reg(index);
+          brw_mark_surface_used(prog_data, index);
+       } else {
+          surf_index = vgrf(glsl_type::uint_type);
+          bld.ADD(surf_index, get_nir_src(instr->src[1]),
+                   fs_reg(stage_prog_data->binding_table.ubo_start));
+          surf_index = bld.emit_uniformize(surf_index);
+          brw_mark_surface_used(prog_data,
+                                stage_prog_data->binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+       /* Offset */
+       fs_reg offset_reg = vgrf(glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+       }
+       /* Value */
+       fs_reg val_reg = get_nir_src(instr->src[0]);
+       /* Writemask */
+       unsigned writemask = instr->const_index[1];
+       /* Write each component present in the writemask */
+       unsigned skipped_channels = 0;
+       for (int i = 0; i < instr->num_components; i++) {
+          int component_mask = 1 << i;
+          if (writemask & component_mask) {
+             if (skipped_channels) {
+                if (!has_indirect) {
+                   const_offset_bytes += 4 * skipped_channels;
+                   bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+                } else {
+                   bld.ADD(offset_reg, offset_reg,
+                            brw_imm_ud(4 * skipped_channels));
+                }
+                skipped_channels = 0;
+             }
+             emit_untyped_write(bld, surf_index, offset_reg,
+                                offset(val_reg, bld, i),
+                                1 /* dims */, 1 /* size */,
+                                BRW_PREDICATE_NONE);
+          }
+          skipped_channels++;
+       }
+       break;
+    }
     case nir_intrinsic_store_output_indirect:
        has_indirect = true;
        /* fallthrough */
  
     case nir_intrinsic_barrier:
        emit_barrier();
+       if (stage == MESA_SHADER_COMPUTE)
+          ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+       break;
+    case nir_intrinsic_load_local_invocation_id:
+    case nir_intrinsic_load_work_group_id: {
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       fs_reg val = nir_system_values[sv];
+       assert(val.file != BAD_FILE);
+       dest.type = val.type;
+       for (unsigned i = 0; i < 3; i++)
+          bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+       break;
+    }
+    case nir_intrinsic_ssbo_atomic_add:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_min:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+       else
+          nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_max:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+       else
+          nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_and:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_or:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_xor:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
        break;
+    case nir_intrinsic_ssbo_atomic_exchange:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_comp_swap:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+       break;
+    case nir_intrinsic_get_buffer_size: {
+       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+       unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+       int reg_width = dispatch_width / 8;
+       assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+       /* Set LOD = 0 */
+       fs_reg source = fs_reg(0);
+       int mlen = 1 * reg_width;
+       fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+                                   BRW_REGISTER_TYPE_UD);
+       bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+       fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+                                src_payload, surf_index);
+       inst->header_size = 0;
+       inst->mlen = mlen;
+       bld.emit(inst);
+       break;
+    }
+    case nir_intrinsic_load_num_work_groups: {
+       assert(devinfo->gen >= 7);
+       assert(stage == MESA_SHADER_COMPUTE);
+       struct brw_cs_prog_data *cs_prog_data =
+          (struct brw_cs_prog_data *) prog_data;
+       const unsigned surface =
+          cs_prog_data->binding_table.work_groups_start;
+       cs_prog_data->uses_num_work_groups = true;
+       fs_reg surf_index = fs_reg(surface);
+       brw_mark_surface_used(prog_data, surface);
+       /* Read the 3 GLuint components of gl_NumWorkGroups */
+       for (unsigned i = 0; i < 3; i++) {
+          fs_reg read_result =
+             emit_untyped_read(bld, surf_index,
+                               fs_reg(i << 2),
+                               1 /* dims */, 1 /* size */,
+                               BRW_PREDICATE_NONE);
+          read_result.type = dest.type;
+          bld.MOV(dest, read_result);
+          dest = offset(dest, bld, 1);
+       }
+       break;
+    }
  
     default:
        unreachable("unknown intrinsic");
     }
  }
  
+ void
+ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+                                  int op, nir_intrinsic_instr *instr)
+ {
+    fs_reg dest;
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+       dest = get_nir_dest(instr->dest);
+    fs_reg surface;
+    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+    if (const_surface) {
+       unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+                             const_surface->u[0];
+       surface = fs_reg(surf_index);
+       brw_mark_surface_used(prog_data, surf_index);
+    } else {
+       surface = vgrf(glsl_type::uint_type);
+       bld.ADD(surface, get_nir_src(instr->src[0]),
+               fs_reg(stage_prog_data->binding_table.ubo_start));
+       /* Assume this may touch any UBO. This is the same we do for other
+        * UBO/SSBO accesses with non-constant surface.
+        */
+       brw_mark_surface_used(prog_data,
+                             stage_prog_data->binding_table.ubo_start +
+                             shader_prog->NumBufferInterfaceBlocks - 1);
+    }
+    fs_reg offset = get_nir_src(instr->src[1]);
+    fs_reg data1 = get_nir_src(instr->src[2]);
+    fs_reg data2;
+    if (op == BRW_AOP_CMPWR)
+       data2 = get_nir_src(instr->src[3]);
+    /* Emit the actual atomic operation operation */
+    fs_reg atomic_result =
+       surface_access::emit_untyped_atomic(bld, surface, offset,
+                                           data1, data2,
+                                           1 /* dims */, 1 /* rsize */,
+                                           op,
+                                           BRW_PREDICATE_NONE);
+    dest.type = atomic_result.type;
+    bld.MOV(dest, atomic_result);
+ }
  void
  fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
  {
 -   unsigned sampler = instr->sampler_index;
 +   uint32_t set = instr->sampler_set;
 +   uint32_t binding = instr->sampler_index;
 +
 +   assert(binding < stage_prog_data->bind_map[set].index_count);
 +   assert(stage_prog_data->bind_map[set].index[binding] < 1000);
 +
 +   unsigned sampler = stage_prog_data->bind_map[set].index[binding];
     fs_reg sampler_reg(sampler);
  
     /* FINISHME: We're failing to recompile our programs when the sampler is
     case nir_texop_txf_ms: op = ir_txf_ms; break;
     case nir_texop_txl: op = ir_txl; break;
     case nir_texop_txs: op = ir_txs; break;
+    case nir_texop_texture_samples: {
+       fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+       fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+                                bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+                                sampler_reg);
+       inst->mlen = 1;
+       inst->header_size = 1;
+       inst->base_mrf = -1;
+       return;
+    }
     default:
        unreachable("unknown texture opcode");
     }
@@@ -1913,12 -2174,6 +2190,12 @@@ fs_visitor::nir_emit_jump(const fs_buil
        bld.emit(BRW_OPCODE_CONTINUE);
        break;
     case nir_jump_return:
 +      /* This has to be the last block in the shader.  We don't handle
 +       * early returns.
 +       */
 +      assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL &&
 +             instr->instr.block->cf_node.parent->type == nir_cf_node_function);
 +      break;
     default:
        unreachable("unknown jump");
     }
index 4ad65215756986526d1bc82dbc387584b6fabd4e,0de36cc9af15084945f37511f8d5f3deeba7d9fd..0119a906e9ea1de3c379107bce595b2700443f64
  #include "brw_state.h"
  #include "brw_ff_gs.h"
  
 -
  bool
 -brw_codegen_gs_prog(struct brw_context *brw,
 +brw_compile_gs_prog(struct brw_context *brw,
                      struct gl_shader_program *prog,
                      struct brw_geometry_program *gp,
 -                    struct brw_gs_prog_key *key)
 +                    struct brw_gs_prog_key *key,
 +                    struct brw_gs_compile_output *output)
  {
 -   struct brw_stage_state *stage_state = &brw->gs.base;
     struct brw_gs_compile c;
     memset(&c, 0, sizeof(c));
     c.key = *key;
     c.gp = gp;
  
 +   /* We get the bind map as input in the output struct...*/
 +   c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries;
 +   memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map,
 +          sizeof(c.prog_data.base.base.bind_map));
 +
     c.prog_data.include_primitive_id =
        (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
  
@@@ -66,8 -62,6 +66,6 @@@
     struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
     int param_count = gs->num_uniform_components * 4;
  
-    /* We also upload clip plane data as uniforms */
-    param_count += MAX_CLIP_PLANES * 4;
     param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
  
     c.prog_data.base.base.param =
     c.prog_data.base.base.nr_params = param_count;
     c.prog_data.base.base.nr_image_params = gs->NumImages;
  
+    if (brw->gen >= 8) {
+       c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+          nir_gs_count_vertices(gp->program.Base.nir);
+    }
     if (brw->gen >= 7) {
        if (gp->program.OutputType == GL_POINTS) {
           /* When the output type is points, the geometry shader may output data
  
     GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
  
-    /* In order for legacy clipping to work, we need to populate the clip
-     * distance varying slots whenever clipping is enabled, even if the vertex
-     * shader doesn't write to gl_ClipDistance.
-     */
-    if (c.key.base.userclip_active) {
-       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
-       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
-    }
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                        &c.prog_data.base.vue_map, outputs_written);
+                        &c.prog_data.base.vue_map, outputs_written,
+                        prog ? prog->SeparateShader : false);
  
     /* Compute the output vertex size.
      *
     c.prog_data.output_topology =
        get_hw_prim_for_gl_prim(gp->program.OutputType);
  
+    /* The GLSL linker will have already matched up GS inputs and the outputs
+     * of prior stages.  The driver does extend VS outputs in some cases, but
+     * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+     * geometry shader support.  So we can safely ignore that.
+     *
+     * For SSO pipelines, we use a fixed VUE map layout based on variable
+     * locations, so we can rely on rendezvous-by-location making this work.
+     *
+     * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+     * written by previous stages and shows up via payload magic.
+     */
+    GLbitfield64 inputs_read =
+       gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                        &c.input_vue_map, c.key.input_varyings);
+                        &c.input_vue_map, inputs_read,
+                        prog->SeparateShader);
  
     /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
      * need to program a URB read length of ceiling(num_slots / 2).
        return false;
     }
  
 -   /* Scratch space is used for register spilling */
 -   if (c.prog_data.base.base.total_scratch) {
 +   output->mem_ctx = mem_ctx;
 +   output->program = program;
 +   output->program_size = program_size;
 +   memcpy(&output->prog_data, &c.prog_data,
 +          sizeof(output->prog_data));
 +
 +   return true;
 +}
 +
 +bool
 +brw_codegen_gs_prog(struct brw_context *brw,
 +                    struct gl_shader_program *prog,
 +                    struct brw_geometry_program *gp,
 +                    struct brw_gs_prog_key *key)
 +{
 +   struct brw_gs_compile_output output;
 +   struct brw_stage_state *stage_state = &brw->gs.base;
 +
 +   if (brw_compile_gs_prog(brw, prog, gp, key, &output))
 +      return false;
 +
 +   if (output.prog_data.base.base.total_scratch) {
        brw_get_scratch_bo(brw, &stage_state->scratch_bo,
 -                       c.prog_data.base.base.total_scratch *
 +                       output.prog_data.base.base.total_scratch *
                           brw->max_gs_threads);
     }
  
     brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
 -                    &c.key, sizeof(c.key),
 -                    program, program_size,
 -                    &c.prog_data, sizeof(c.prog_data),
 +                    key, sizeof(*key),
 +                    output.program, output.program_size,
 +                    &output.prog_data, sizeof(output.prog_data),
                      &stage_state->prog_offset, &brw->gs.prog_data);
 -   ralloc_free(mem_ctx);
 +   ralloc_free(output.mem_ctx);
  
     return true;
  }
@@@ -317,8 -302,7 +326,7 @@@ brw_gs_state_dirty(struct brw_context *
     return brw_state_dirty(brw,
                            _NEW_TEXTURE,
                            BRW_NEW_GEOMETRY_PROGRAM |
-                           BRW_NEW_TRANSFORM_FEEDBACK |
-                           BRW_NEW_VUE_MAP_VS);
+                           BRW_NEW_TRANSFORM_FEEDBACK);
  }
  
  static void
@@@ -333,16 -317,11 +341,11 @@@ brw_gs_populate_key(struct brw_context 
  
     memset(key, 0, sizeof(*key));
  
-    key->base.program_string_id = gp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                gp->program.Base.UsesClipDistanceOut);
+    key->program_string_id = gp->id;
  
     /* _NEW_TEXTURE */
     brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                       &key->base.tex);
-    /* BRW_NEW_VUE_MAP_VS */
-    key->input_varyings = brw->vue_map_vs.slots_valid;
+                                       &key->tex);
  }
  
  void
@@@ -361,11 -340,6 +364,6 @@@ brw_upload_gs_prog(struct brw_context *
  
     if (gp == NULL) {
        /* No geometry shader.  Vertex data just passes straight through. */
-       if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
-          brw->vue_map_geom_out = brw->vue_map_vs;
-          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-       }
        if (brw->gen == 6 &&
            (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
           gen6_brw_upload_ff_gs_prog(brw);
        (void)success;
     }
     brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-    if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-               sizeof(brw->vue_map_geom_out)) != 0) {
-       brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
-       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-    }
  }
  
  bool
@@@ -416,12 -384,8 +408,8 @@@ brw_gs_precompile(struct gl_context *ct
  
     memset(&key, 0, sizeof(key));
  
-    brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
-    /* Assume that the set of varyings coming in from the vertex shader exactly
-     * matches what the geometry shader requires.
-     */
-    key.input_varyings = gp->Base.InputsRead;
+    brw_setup_tex_for_precompile(brw, &key.tex, prog);
+    key.program_string_id = bgp->id;
  
     success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
  
index 4c8602a108572265cb1eacedd247599ecaa2b0f8,1d4f6ab2ccd5e257a4c11889f1178d92db1c96c9..35855092dfff4f8cc4f69bd4a96d527d588f4b9f
@@@ -61,6 -61,8 +61,8 @@@ nir_optimize(nir_shader *nir, bool is_s
        nir_validate_shader(nir);
        progress |= nir_opt_constant_folding(nir);
        nir_validate_shader(nir);
+       progress |= nir_opt_dead_cf(nir);
+       nir_validate_shader(nir);
        progress |= nir_opt_remove_phis(nir);
        nir_validate_shader(nir);
        progress |= nir_opt_undef(nir);
@@@ -78,7 -80,11 +80,7 @@@ brw_create_nir(struct brw_context *brw
     struct gl_context *ctx = &brw->ctx;
     const nir_shader_compiler_options *options =
        ctx->Const.ShaderCompilerOptions[stage].NirOptions;
 -   static const nir_lower_tex_options tex_options = {
 -      .lower_txp = ~0,
 -   };
     struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
 -   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
     nir_shader *nir;
  
     /* First, lower the GLSL IR or Mesa IR to NIR */
     }
     nir_validate_shader(nir);
  
 +   brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar);
 +
 +   static GLuint msg_id = 0;
 +   _mesa_gl_debug(&brw->ctx, &msg_id,
 +                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
 +                  MESA_DEBUG_TYPE_OTHER,
 +                  MESA_DEBUG_SEVERITY_NOTIFICATION,
 +                  "%s NIR shader:\n",
 +                  _mesa_shader_stage_to_abbrev(stage));
 +
 +   return nir;
 +}
 +
 +void
 +brw_process_nir(nir_shader *nir,
 +                const struct brw_device_info *devinfo,
 +                const struct gl_shader_program *shader_prog,
 +                gl_shader_stage stage, bool is_scalar)
 +{
 +   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
++   static const nir_lower_tex_options tex_options = {
++      .lower_txp = ~0,
++   };
++
+    if (stage == MESA_SHADER_GEOMETRY) {
+       nir_lower_gs_intrinsics(nir);
+       nir_validate_shader(nir);
+    }
  
     nir_lower_global_vars_to_local(nir);
     nir_validate_shader(nir);
  
-    nir_lower_tex_projector(nir);
+    nir_lower_tex(nir, &tex_options);
     nir_validate_shader(nir);
  
     nir_normalize_cubemap_coords(nir);
  
     if (shader_prog) {
        nir_lower_samplers(nir, shader_prog);
 -      nir_validate_shader(nir);
 +   } else {
 +      nir_lower_samplers_for_vk(nir);
     }
 +   nir_validate_shader(nir);
  
     nir_lower_system_values(nir);
     nir_validate_shader(nir);
  
     nir_optimize(nir, is_scalar);
  
 -   if (brw->gen >= 6) {
 +   if (devinfo->gen >= 6) {
        /* Try and fuse multiply-adds */
        nir_opt_peephole_ffma(nir);
        nir_validate_shader(nir);
        nir_print_shader(nir, stderr);
     }
  
-    nir_convert_from_ssa(nir, is_scalar);
+    nir_convert_from_ssa(nir, true);
     nir_validate_shader(nir);
  
     if (!is_scalar) {
+       nir_move_vec_src_uses_to_dest(nir);
+       nir_validate_shader(nir);
        nir_lower_vec_to_movs(nir);
        nir_validate_shader(nir);
     }
      * run it last because it stashes data in instr->pass_flags and we don't
      * want that to be squashed by other NIR passes.
      */
 -   if (brw->gen <= 5)
 +   if (devinfo->gen <= 5)
        brw_nir_analyze_boolean_resolves(nir);
  
     nir_sweep(nir);
                _mesa_shader_stage_to_string(stage));
        nir_print_shader(nir, stderr);
     }
 -
 -   return nir;
  }
  
  enum brw_reg_type
index 5a54cd39076163cc3f8918f74c803a1b7565625d,1ac0ed273ef10d9fc2fc81a9acf0771ccb033d3f..fa59338950a55fa62da3f9b7500dd7c8054b4070
@@@ -276,7 -276,7 +276,7 @@@ brw_get_scratch_bo(struct brw_context *
  
  void brwInitFragProgFuncs( struct dd_function_table *functions )
  {
 -   assert(functions->ProgramStringNotify == _tnl_program_string);
 +   /* assert(functions->ProgramStringNotify == _tnl_program_string); */
  
     functions->NewProgram = brwNewProgram;
     functions->DeleteProgram = brwDeleteProgram;
@@@ -588,3 -588,22 +588,22 @@@ brw_dump_ir(const char *stage, struct g
        _mesa_print_program(prog);
     }
  }
+ void
+ brw_setup_tex_for_precompile(struct brw_context *brw,
+                              struct brw_sampler_prog_key_data *tex,
+                              struct gl_program *prog)
+ {
+    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+    unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+    for (unsigned i = 0; i < sampler_count; i++) {
+       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+          tex->swizzles[i] =
+             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+       } else {
+          /* Color sampler: assume no swizzling. */
+          tex->swizzles[i] = SWIZZLE_XYZW;
+       }
+    }
+ }
index 4ef2777559f36e2a23952bdce2d0f78c04d3dbea,785cb2722feee68d0ddadc382249d555415f6e34..1060d93ae6ba328e984db9bb55bd5e54a69f5609
@@@ -96,8 -96,15 +96,17 @@@ brw_compiler_create(void *mem_ctx, cons
      */
     nir_options->lower_ffma = true;
     nir_options->lower_sub = true;
 +   nir_options->lower_fdiv = true;
 +
+    /* In the vec4 backend, our dpN instruction replicates its result to all
+     * the components of a vec4.  We would like NIR to give us replicated fdot
+     * instructions because it can optimize better for us.
+     *
+     * For the FS backend, it should be lowered away by the scalarizing pass so
+     * we should never see fdot anyway.
+     */
+    nir_options->fdot_replicates = true;
     /* We want the GLSL compiler to emit code that uses condition codes */
     for (int i = 0; i < MESA_SHADER_STAGES; i++) {
        compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
        compiler->glsl_compiler_options[i].EmitNoNoise = true;
        compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
        compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
-       compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
-        (i == MESA_SHADER_FRAGMENT);
-       compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
-        (i == MESA_SHADER_FRAGMENT);
        compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
        compiler->glsl_compiler_options[i].LowerClipDistance = true;
  
+       bool is_scalar;
+       switch (i) {
+       case MESA_SHADER_FRAGMENT:
+       case MESA_SHADER_COMPUTE:
+          is_scalar = true;
+          break;
+       case MESA_SHADER_VERTEX:
+          is_scalar = compiler->scalar_vs;
+          break;
+       default:
+          is_scalar = false;
+          break;
+       }
+       compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+       compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+       compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
        /* !ARB_gpu_shader5 */
        if (devinfo->gen < 7)
           compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
-    }
  
-    compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-    compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-    if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-       if (compiler->scalar_vs) {
-          /* If we're using the scalar backend for vertex shaders, we need to
-           * configure these accordingly.
-           */
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-       }
-       compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
-    }
-    if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-       compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+       if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+          compiler->glsl_compiler_options[i].NirOptions = nir_options;
     }
  
-    compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
-    compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
     return compiler;
  }
  
@@@ -196,6 -197,7 +199,7 @@@ is_scalar_shader_stage(struct brw_conte
  {
     switch (stage) {
     case MESA_SHADER_FRAGMENT:
+    case MESA_SHADER_COMPUTE:
        return true;
     case MESA_SHADER_VERTEX:
        return brw->intelScreen->compiler->scalar_vs;
@@@ -323,9 -325,6 +327,6 @@@ process_glsl_ir(gl_shader_stage stage
                                          options, ctx->Const.NativeIntegers) || progress;
     } while (progress);
  
-    if (options->NirOptions != NULL)
-       lower_output_reads(stage, shader->ir);
     validate_ir_tree(shader->ir);
  
     /* Now that we've finished altering the linked IR, reparent any live IR back
@@@ -460,7 -459,6 +461,7 @@@ brw_type_for_base_type(const struct gls
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
     case GLSL_TYPE_DOUBLE:
 +   case GLSL_TYPE_FUNCTION:
        unreachable("not reached");
     }
  
@@@ -623,6 -621,8 +624,8 @@@ brw_instruction_name(enum opcode op
        return "tg4_offset";
     case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
        return "tg4_offset_logical";
+    case SHADER_OPCODE_SAMPLEINFO:
+       return "sampleinfo";
  
     case SHADER_OPCODE_SHADER_TIME_ADD:
        return "shader_time_add";
     case FS_OPCODE_PIXEL_Y:
        return "pixel_y";
  
+    case FS_OPCODE_GET_BUFFER_SIZE:
+       return "fs_get_buffer_size";
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
        return "uniform_pull_const";
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
     case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
        return "set_simd4x2_header_gen9";
  
+    case VS_OPCODE_GET_BUFFER_SIZE:
+       return "vs_get_buffer_size";
     case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
        return "unpack_flags_simd4x2";
  
index 5e528b5c5a19c5d3cc3dd5b1ee6e2f7fdb27f7b5,c61b38548f7f10dfd754f9cc413d35c9fcef1cc5..1d62f2f6a75f609abd901cf54776086d5a7d4ef0
@@@ -61,6 -61,8 +61,8 @@@ src_reg::src_reg(register_file file, in
        this->swizzle = brw_swizzle_for_size(type->vector_elements);
     else
        this->swizzle = BRW_SWIZZLE_XYZW;
+    if (type)
+       this->type = brw_type_for_base_type(type);
  }
  
  /** Generic unset register constructor. */
@@@ -329,6 -331,8 +331,8 @@@ vec4_visitor::implied_mrf_writes(vec4_i
     case SHADER_OPCODE_TXS:
     case SHADER_OPCODE_TG4:
     case SHADER_OPCODE_TG4_OFFSET:
+    case SHADER_OPCODE_SAMPLEINFO:
+    case VS_OPCODE_GET_BUFFER_SIZE:
        return inst->header_size;
     default:
        unreachable("not reached");
@@@ -938,10 -942,18 +942,18 @@@ vec4_visitor::opt_set_dependency_contro
  }
  
  bool
- vec4_instruction::can_reswizzle(int dst_writemask,
+ vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+                                 int dst_writemask,
                                  int swizzle,
                                  int swizzle_mask)
  {
+    /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+     * or writemasking are not allowed.
+     */
+    if (devinfo->gen == 6 && is_math() &&
+        (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+       return false;
     /* If this instruction sets anything not referenced by swizzle, then we'd
      * totally break it when we reswizzle.
      */
     if (mlen > 0)
        return false;
  
+    /* We can't use swizzles on the accumulator and that's really the only
+     * HW_REG we would care to reswizzle so just disallow them all.
+     */
+    for (int i = 0; i < 3; i++) {
+       if (src[i].file == HW_REG)
+          return false;
+    }
     return true;
  }
  
@@@ -1010,6 -1030,28 +1030,28 @@@ vec4_visitor::opt_register_coalesce(
          inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
         continue;
  
+       /* Remove no-op MOVs */
+       if (inst->dst.file == inst->src[0].file &&
+           inst->dst.reg == inst->src[0].reg &&
+           inst->dst.reg_offset == inst->src[0].reg_offset) {
+          bool is_nop_mov = true;
+          for (unsigned c = 0; c < 4; c++) {
+             if ((inst->dst.writemask & (1 << c)) == 0)
+                continue;
+             if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+                is_nop_mov = false;
+                break;
+             }
+          }
+          if (is_nop_mov) {
+             inst->remove(block);
+             continue;
+          }
+       }
        bool to_mrf = (inst->dst.file == MRF);
  
        /* Can't coalesce this GRF if someone else was going to
                 }
              }
  
+             /* This doesn't handle saturation on the instruction we
+              * want to coalesce away if the register types do not match.
+              * But if scan_inst is a non type-converting 'mov', we can fix
+              * the types later.
+              */
+             if (inst->saturate &&
+                 inst->dst.type != scan_inst->dst.type &&
+                 !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                   scan_inst->dst.type == scan_inst->src[0].type))
+                break;
              /* If we can't handle the swizzle, bail. */
-             if (!scan_inst->can_reswizzle(inst->dst.writemask,
+             if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
                                            inst->src[0].swizzle,
                                            chans_needed)) {
                 break;
         if (interfered)
            break;
  
-          /* If somebody else writes our destination here, we can't coalesce
-           * before that.
+          /* If somebody else writes the same channels of our destination here,
+           * we can't coalesce before that.
            */
-          if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-           break;
+          if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+              (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+             break;
+          }
  
           /* Check for reads of the register we're trying to coalesce into.  We
            * can't go rewriting instructions above that to put some other value
               scan_inst->dst.file = inst->dst.file;
               scan_inst->dst.reg = inst->dst.reg;
               scan_inst->dst.reg_offset = inst->dst.reg_offset;
+                if (inst->saturate &&
+                    inst->dst.type != scan_inst->dst.type) {
+                   /* If we have reached this point, scan_inst is a non
+                    * type-converting 'mov' and we can modify its register types
+                    * to match the ones in inst. Otherwise, we could have an
+                    * incorrect saturation result.
+                    */
+                   scan_inst->dst.type = inst->dst.type;
+                   scan_inst->src[0].type = inst->src[0].type;
+                }
               scan_inst->saturate |= inst->saturate;
            }
            scan_inst = (vec4_instruction *)scan_inst->next;
@@@ -1719,7 -1784,7 +1784,7 @@@ vec4_visitor::emit_shader_time_write(in
  }
  
  bool
- vec4_visitor::run(gl_clip_plane *clip_planes)
+ vec4_visitor::run()
  {
     bool use_vec4_nir =
        compiler->glsl_compiler_options[stage].NirOptions != NULL;
     }
     base_ir = NULL;
  
-    if (key->userclip_active && !prog->UsesClipDistanceOut)
-       setup_uniform_clipplane_values(clip_planes);
     emit_thread_end();
  
     calculate_cfg();
  
     setup_payload();
  
-    if (false) {
+    if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
        /* Debug of register spilling: Go spill everything. */
        const int grf_count = alloc.count;
        float spill_costs[alloc.count];
@@@ -1899,16 -1961,8 +1961,8 @@@ brw_vs_emit(struct brw_context *brw
              struct gl_shader_program *prog,
              unsigned *final_assembly_size)
  {
-    bool start_busy = false;
-    double start_time = 0;
     const unsigned *assembly = NULL;
  
-    if (unlikely(brw->perf_debug)) {
-       start_busy = (brw->batch.last_bo &&
-                     drm_intel_bo_busy(brw->batch.last_bo));
-       start_time = get_time();
-    }
     struct brw_shader *shader = NULL;
     if (prog)
        shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
  
 -   if (unlikely(INTEL_DEBUG & DEBUG_VS))
 +   if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
        brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
  
     if (!vp->Base.nir &&
        prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
  
        vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
-                         vp, prog, mem_ctx, st_index,
+                         vp, prog, brw_select_clip_planes(&brw->ctx),
+                         mem_ctx, st_index,
                          !_mesa_is_gles3(&brw->ctx));
-       if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+       if (!v.run()) {
           if (prog) {
              prog->LinkStatus = false;
              ralloc_strcat(&prog->InfoLog, v.fail_msg);
        assembly = g.generate_assembly(v.cfg, final_assembly_size);
     }
  
-    if (unlikely(brw->perf_debug) && shader) {
-       if (shader->compiled_once) {
-          brw_vs_debug_recompile(brw, prog, key);
-       }
-       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
-                     (get_time() - start_time) * 1000);
-       }
-       shader->compiled_once = true;
-    }
     return assembly;
  }
  
- void
- brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                       struct brw_vue_prog_key *key,
-                                       GLuint id, struct gl_program *prog)
- {
-    struct brw_context *brw = brw_context(ctx);
-    key->program_string_id = id;
-    brw_setup_tex_for_precompile(brw, &key->tex, prog);
- }
  } /* extern "C" */
index d5a24d823b5232c952d5d87ca2116488745ae5b5,94906d2e70505d6d34d1221703edad9194531fce..9d56f9a1ae0d3a00ddcb0bc9768369f4aad60442
  
  #include "brw_nir.h"
  #include "brw_vec4.h"
+ #include "brw_vec4_builder.h"
+ #include "brw_vec4_surface_builder.h"
  #include "glsl/ir_uniform.h"
  
+ using namespace brw;
+ using namespace brw::surface_access;
  namespace brw {
  
  void
@@@ -58,25 -63,24 +63,24 @@@ vec4_visitor::nir_setup_system_value_in
        unreachable("should be lowered by lower_vertex_id().");
  
     case nir_intrinsic_load_vertex_id_zero_base:
-       reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+       reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
        if (reg->file == BAD_FILE)
-          *reg =
-             *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
-                                              glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+                                            glsl_type::int_type);
        break;
  
     case nir_intrinsic_load_base_vertex:
-       reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+       reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
        if (reg->file == BAD_FILE)
-          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
-                                                  glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+                                            glsl_type::int_type);
        break;
  
     case nir_intrinsic_load_instance_id:
-       reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+       reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
        if (reg->file == BAD_FILE)
-          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
-                                                  glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+                                            glsl_type::int_type);
        break;
  
     default:
@@@ -142,7 -146,7 +146,7 @@@ vec4_visitor::nir_setup_uniforms(nir_sh
           }
  
           assert(uniforms < uniform_array_size);
-          this->uniform_size[uniforms] = type_size_vec4(var->type);
+          uniform_size[uniforms] = type_size_vec4(var->type);
  
           if (strncmp(var->name, "gl_", 3) == 0)
              nir_setup_builtin_uniform(var);
               strcmp(var->name, "parameters") == 0);
  
        assert(uniforms < uniform_array_size);
-       this->uniform_size[uniforms] = type_size_vec4(var->type);
+       uniform_size[uniforms] = type_size_vec4(var->type);
  
        struct gl_program_parameter_list *plist = prog->Parameters;
        for (unsigned p = 0; p < plist->NumParameters; p++) {
@@@ -243,10 -247,10 +247,10 @@@ vec4_visitor::nir_setup_builtin_uniform
         * ParameterValues directly, since unlike brw_fs.cpp, we never
         * add new state references during compile.
         */
-       int index = _mesa_add_state_reference(this->prog->Parameters,
+       int index = _mesa_add_state_reference(prog->Parameters,
                                            (gl_state_index *)slots[i].tokens);
        gl_constant_value *values =
-          &this->prog->Parameters->ParameterValues[index][0];
+          &prog->Parameters->ParameterValues[index][0];
  
        assert(uniforms < uniform_array_size);
  
           stage_prog_data->param[uniforms * 4 + j] =
              &values[GET_SWZ(slots[i].swizzle, j)];
  
-       this->uniform_vector_size[uniforms] =
+       uniform_vector_size[uniforms] =
           (var->type->is_scalar() || var->type->is_vector() ||
            var->type->is_matrix() ? var->type->vector_elements : 4);
  
@@@ -344,7 -348,7 +348,7 @@@ vec4_visitor::nir_emit_block(nir_block 
  void
  vec4_visitor::nir_emit_instr(nir_instr *instr)
  {
-    this->base_ir = instr;
+    base_ir = instr;
  
     switch (instr->type) {
     case nir_instr_type_load_const:
        nir_emit_texture(nir_instr_as_tex(instr));
        break;
  
+    case nir_instr_type_ssa_undef:
+       nir_emit_undef(nir_instr_as_ssa_undef(instr));
+       break;
     default:
        fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
        break;
@@@ -393,9 -401,14 +401,14 @@@ dst_reg_for_nir_reg(vec4_visitor *v, ni
  dst_reg
  vec4_visitor::get_nir_dest(nir_dest dest)
  {
-    assert(!dest.is_ssa);
-    return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
-                               dest.reg.indirect);
+    if (dest.is_ssa) {
+       dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+       nir_ssa_values[dest.ssa.index] = dst;
+       return dst;
+    } else {
+       return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+                                  dest.reg.indirect);
+    }
  }
  
  dst_reg
@@@ -450,7 -463,7 +463,7 @@@ voi
  vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
  {
     dst_reg reg = dst_reg(GRF, alloc.allocate(1));
-    reg.type =  BRW_REGISTER_TYPE_F;
+    reg.type =  BRW_REGISTER_TYPE_D;
  
     unsigned remaining = brw_writemask_for_size(instr->def.num_components);
  
        }
  
        reg.writemask = writemask;
-       emit(MOV(reg, src_reg(instr->value.f[i])));
+       emit(MOV(reg, src_reg(instr->value.i[i])));
  
        remaining &= ~writemask;
     }
@@@ -530,33 -543,271 +543,271 @@@ vec4_visitor::nir_emit_intrinsic(nir_in
        break;
     }
  
-    case nir_intrinsic_load_vertex_id:
-       unreachable("should be lowered by lower_vertex_id()");
+    case nir_intrinsic_get_buffer_size: {
+       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+       unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+       assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+       src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                    ubo_index);
+       dst_reg result_dst = get_nir_dest(instr->dest);
+       vec4_instruction *inst = new(mem_ctx)
+          vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+       inst->base_mrf = 2;
+       inst->mlen = 1; /* always at least one */
+       inst->src[1] = src_reg(surf_index);
+       /* MRF for the first parameter */
+       src_reg lod = src_reg(0);
+       int param_base = inst->base_mrf;
+       int writemask = WRITEMASK_X;
+       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+       emit(inst);
+       break;
+    }
+    case nir_intrinsic_store_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_store_ssbo: {
+       assert(devinfo->gen >= 7);
+       /* Block index */
+       src_reg surf_index;
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[1]);
+       if (const_uniform_block) {
+          unsigned index = prog_data->base.binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = src_reg(index);
+          brw_mark_surface_used(&prog_data->base, index);
+       } else {
+          surf_index = src_reg(this, glsl_type::uint_type);
+          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+                   src_reg(prog_data->base.binding_table.ubo_start)));
+          surf_index = emit_uniformize(surf_index);
+          brw_mark_surface_used(&prog_data->base,
+                                prog_data->base.binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+       /* Offset */
+       src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+       }
+       /* Value */
+       src_reg val_reg = get_nir_src(instr->src[0], 4);
+       /* Writemask */
+       unsigned write_mask = instr->const_index[1];
+       /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+        * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+        * typed and untyped messages and across hardware platforms, the
+        * current implementation of the untyped messages will transparently convert
+        * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+        * and enabling only channel X on the SEND instruction.
+        *
+        * The above, works well for full vector writes, but not for partial writes
+        * where we want to write some channels and not others, like when we have
+        * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+        * quite restrictive with regards to the channel enables we can configure in
+        * the message descriptor (not all combinations are allowed) we cannot simply
+        * implement these scenarios with a single message while keeping the
+        * aforementioned symmetry in the implementation. For now we de decided that
+        * it is better to keep the symmetry to reduce complexity, so in situations
+        * such as the one described we end up emitting two untyped write messages
+        * (one for xy and another for w).
+        *
+        * The code below packs consecutive channels into a single write message,
+        * detects gaps in the vector write and if needed, sends a second message
+        * with the remaining channels. If in the future we decide that we want to
+        * emit a single message at the expense of losing the symmetry in the
+        * implementation we can:
+        *
+        * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+        *    message payload. In this mode we can write up to 8 offsets and dwords
+        *    to the red channel only (for the two vec4s in the SIMD4x2 execution)
+        *    and select which of the 8 channels carry data to write by setting the
+        *    appropriate writemask in the dst register of the SEND instruction.
+        *    It would require to write a new generator opcode specifically for
+        *    IvyBridge since we would need to prepare a SIMD8 payload that could
+        *    use any channel, not just X.
+        *
+        * 2) For Haswell+: Simply send a single write message but set the writemask
+        *    on the dst of the SEND instruction to select the channels we want to
+        *    write. It would require to modify the current messages to receive
+        *    and honor the writemask provided.
+        */
+       const vec4_builder bld = vec4_builder(this).at_end()
+                                .annotate(current_annotation, base_ir);
+       int swizzle[4] = { 0, 0, 0, 0};
+       int num_channels = 0;
+       unsigned skipped_channels = 0;
+       int num_components = instr->num_components;
+       for (int i = 0; i < num_components; i++) {
+          /* Check if this channel needs to be written. If so, record the
+           * channel we need to take the data from in the swizzle array
+           */
+          int component_mask = 1 << i;
+          int write_test = write_mask & component_mask;
+          if (write_test)
+             swizzle[num_channels++] = i;
+          /* If we don't have to write this channel it means we have a gap in the
+           * vector, so write the channels we accumulated until now, if any. Do
+           * the same if this was the last component in the vector.
+           */
+          if (!write_test || i == num_components - 1) {
+             if (num_channels > 0) {
+                /* We have channels to write, so update the offset we need to
+                 * write at to skip the channels we skipped, if any.
+                 */
+                if (skipped_channels > 0) {
+                   if (!has_indirect) {
+                      const_offset_bytes += 4 * skipped_channels;
+                      offset_reg = src_reg(const_offset_bytes);
+                   } else {
+                      emit(ADD(dst_reg(offset_reg), offset_reg,
+                               brw_imm_ud(4 * skipped_channels)));
+                   }
+                }
+                /* Swizzle the data register so we take the data from the channels
+                 * we need to write and send the write message. This will write
+                 * num_channels consecutive dwords starting at offset.
+                 */
+                val_reg.swizzle =
+                   BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+                emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                                   1 /* dims */, num_channels /* size */,
+                                   BRW_PREDICATE_NONE);
+                /* If we have to do a second write we will have to update the
+                 * offset so that we jump over the channels we have just written
+                 * now.
+                 */
+                skipped_channels = num_channels;
+                /* Restart the count for the next write message */
+                num_channels = 0;
+             }
+             /* We did not write the current channel, so increase skipped count */
+             skipped_channels++;
+          }
+       }
  
-    case nir_intrinsic_load_vertex_id_zero_base: {
-       src_reg vertex_id =
-          src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
-       assert(vertex_id.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, vertex_id.type);
-       emit(MOV(dest, vertex_id));
        break;
     }
  
-    case nir_intrinsic_load_base_vertex: {
-       src_reg base_vertex =
-          src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
-       assert(base_vertex.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, base_vertex.type);
-       emit(MOV(dest, base_vertex));
+    case nir_intrinsic_load_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_load_ssbo: {
+       assert(devinfo->gen >= 7);
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[0]);
+       src_reg surf_index;
+       if (const_uniform_block) {
+          unsigned index = prog_data->base.binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = src_reg(index);
+          brw_mark_surface_used(&prog_data->base, index);
+       } else {
+          surf_index = src_reg(this, glsl_type::uint_type);
+          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+                   src_reg(prog_data->base.binding_table.ubo_start)));
+          surf_index = emit_uniformize(surf_index);
+          /* Assume this may touch any UBO. It would be nice to provide
+           * a tighter bound, but the array information is already lowered away.
+           */
+          brw_mark_surface_used(&prog_data->base,
+                                prog_data->base.binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+       src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+       }
+       /* Read the vector */
+       const vec4_builder bld = vec4_builder(this).at_end()
+          .annotate(current_annotation, base_ir);
+       src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                               1 /* dims */, 4 /* size*/,
+                                               BRW_PREDICATE_NONE);
+       dst_reg dest = get_nir_dest(instr->dest);
+       read_result.type = dest.type;
+       read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+       emit(MOV(dest, read_result));
        break;
     }
  
+    case nir_intrinsic_ssbo_atomic_add:
+       nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_min:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+       else
+          nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_max:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+       else
+          nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_and:
+       nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_or:
+       nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_xor:
+       nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_exchange:
+       nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_comp_swap:
+       nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+       break;
+    case nir_intrinsic_load_vertex_id:
+       unreachable("should be lowered by lower_vertex_id()");
+    case nir_intrinsic_load_vertex_id_zero_base:
+    case nir_intrinsic_load_base_vertex:
     case nir_intrinsic_load_instance_id: {
-       src_reg instance_id =
-          src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
-       assert(instance_id.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, instance_id.type);
-       emit(MOV(dest, instance_id));
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       src_reg val = src_reg(nir_system_values[sv]);
+       assert(val.file != BAD_FILE);
+       dest = get_nir_dest(instr->dest, val.type);
+       emit(MOV(dest, val));
        break;
     }
  
        has_indirect = true;
        /* fallthrough */
     case nir_intrinsic_load_ubo: {
 +      const uint32_t set = instr->const_index[0];
        nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
        src_reg surf_index;
  
        dest = get_nir_dest(instr->dest);
  
        if (const_block_index) {
 +         uint32_t binding = const_block_index->u[0];
 +
           /* The block index is a constant, so just emit the binding table entry
            * as an immediate.
            */
 -         surf_index = src_reg(prog_data->base.binding_table.ubo_start +
 -                              const_block_index->u[0]);
 +         surf_index = src_reg(stage_prog_data->bind_map[set].index[binding]);
        } else {
           /* The block index is not a constant. Evaluate the index expression
            * per-channel and add the base UBO index; we have to select a value
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
 -      unsigned const_offset = instr->const_index[0];
 +      unsigned const_offset = instr->const_index[1];
        src_reg offset;
  
        if (!has_indirect)  {
     }
  }
  
+ void
+ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+ {
+    dst_reg dest;
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+       dest = get_nir_dest(instr->dest);
+    src_reg surface;
+    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+    if (const_surface) {
+       unsigned surf_index = prog_data->base.binding_table.ubo_start +
+                             const_surface->u[0];
+       surface = src_reg(surf_index);
+       brw_mark_surface_used(&prog_data->base, surf_index);
+    } else {
+       surface = src_reg(this, glsl_type::uint_type);
+       emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+                src_reg(prog_data->base.binding_table.ubo_start)));
+       /* Assume this may touch any UBO. This is the same we do for other
+        * UBO/SSBO accesses with non-constant surface.
+        */
+       brw_mark_surface_used(&prog_data->base,
+                             prog_data->base.binding_table.ubo_start +
+                             shader_prog->NumBufferInterfaceBlocks - 1);
+    }
+    src_reg offset = get_nir_src(instr->src[1], 1);
+    src_reg data1 = get_nir_src(instr->src[2], 1);
+    src_reg data2;
+    if (op == BRW_AOP_CMPWR)
+       data2 = get_nir_src(instr->src[3], 1);
+    /* Emit the actual atomic operation operation */
+    const vec4_builder bld =
+       vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+    src_reg atomic_result =
+       surface_access::emit_untyped_atomic(bld, surface, offset,
+                                           data1, data2,
+                                           1 /* dims */, 1 /* rsize */,
+                                           op,
+                                           BRW_PREDICATE_NONE);
+    dest.type = atomic_result.type;
+    bld.MOV(dest, atomic_result);
+ }
  static unsigned
  brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
  {
@@@ -1263,21 -1559,26 +1561,26 @@@ vec4_visitor::nir_emit_alu(nir_alu_inst
        inst->predicate = BRW_PREDICATE_NORMAL;
        break;
  
-    case nir_op_fdot2:
+    case nir_op_fdot_replicated2:
        inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
-    case nir_op_fdot3:
+    case nir_op_fdot_replicated3:
        inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
-    case nir_op_fdot4:
+    case nir_op_fdot_replicated4:
        inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
+    case nir_op_fdph_replicated:
+       inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+       inst->saturate = instr->dest.saturate;
+       break;
     case nir_op_bany2:
     case nir_op_bany3:
     case nir_op_bany4: {
@@@ -1355,6 -1656,7 +1658,7 @@@ ir_texture_opcode_for_nir_texop(nir_tex
     switch (texop) {
     case nir_texop_lod: op = ir_lod; break;
     case nir_texop_query_levels: op = ir_query_levels; break;
+    case nir_texop_texture_samples: op = ir_texture_samples; break;
     case nir_texop_tex: op = ir_tex; break;
     case nir_texop_tg4: op = ir_tg4; break;
     case nir_texop_txb: op = ir_txb; break;
@@@ -1411,7 -1713,7 +1715,7 @@@ vec4_visitor::nir_emit_texture(nir_tex_
      * emitting anything other than setting up the constant result.
      */
     if (instr->op == nir_texop_tg4) {
-       int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+       int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
        if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
           emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
           return;
           sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
           assert(coord_type != NULL);
           if (devinfo->gen >= 7 &&
-              key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+              key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
              mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
           } else {
              mcs = src_reg(0u);
                  mcs, is_cube_array, sampler, sampler_reg);
  }
  
+ void
+ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+ {
+    nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+ }
  }
index 499f6288aeec1295a583ecde6f02951cf215ee99,3cb783ece1c675c447028469d90323730436d591..6d61112056ca2bd3dc81b7f204cdee9aeda665d7
@@@ -26,6 -26,8 +26,8 @@@
  #include "glsl/ir_uniform.h"
  #include "program/sampler.h"
  
+ #define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
  namespace brw {
  
  vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@@ -256,7 -258,7 +258,7 @@@ vec4_visitor::SCRATCH_READ(const dst_re
  
     inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
                                        dst, index);
-    inst->base_mrf = 14;
+    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
     inst->mlen = 2;
  
     return inst;
@@@ -270,7 -272,7 +272,7 @@@ vec4_visitor::SCRATCH_WRITE(const dst_r
  
     inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
                                        dst, src, index);
-    inst->base_mrf = 13;
+    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
     inst->mlen = 3;
  
     return inst;
@@@ -643,7 -645,6 +645,7 @@@ type_size_vec4(const struct glsl_type *
     case GLSL_TYPE_DOUBLE:
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
 +   case GLSL_TYPE_FUNCTION:
        unreachable("not reached");
     }
  
@@@ -756,22 -757,6 +758,6 @@@ vec4_visitor::setup_uniform_values(ir_v
     }
  }
  
- void
- vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
- {
-    for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
-       assert(this->uniforms < uniform_array_size);
-       this->uniform_vector_size[this->uniforms] = 4;
-       this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
-       this->userplane[i].type = BRW_REGISTER_TYPE_F;
-       for (int j = 0; j < 4; ++j) {
-          stage_prog_data->param[this->uniforms * 4 + j] =
-             (gl_constant_value *) &clip_planes[i][j];
-       }
-       ++this->uniforms;
-    }
- }
  /* Our support for builtin uniforms is even scarier than non-builtin.
   * It sits on top of the PROG_STATE_VAR parameters that are
   * automatically updated from GL context state.
@@@ -1089,11 -1074,12 +1075,12 @@@ vec4_visitor::visit(ir_variable *ir
        break;
  
     case ir_var_uniform:
+    case ir_var_shader_storage:
        reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
  
        /* Thanks to the lower_ubo_reference pass, we will see only
-        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-        * variables, so no need for them to be in variable_ht.
+        * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+        * for UBO/SSBO variables, so no need for them to be in variable_ht.
         *
         * Some uniforms, such as samplers and atomic counters, have no actual
         * storage, so we should ignore them.
@@@ -1401,7 -1387,7 +1388,7 @@@ vec4_visitor::emit_pull_constant_load_r
                                             dst,
                                             surf_index,
                                             offset_reg);
-       pull->base_mrf = 14;
+       pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
        pull->mlen = 1;
     }
  
@@@ -1599,6 -1585,10 +1586,10 @@@ vec4_visitor::visit(ir_expression *ir
        emit(MOV(result_dst, op[0]));
        break;
  
+    case ir_unop_ssbo_unsized_array_length:
+       unreachable("not reached: should be handled by lower_ubo_reference");
+       break;
     case ir_binop_add:
        emit(ADD(result_dst, op[0], op[1]));
        break;
        emit(RNDE(result_dst, op[0]));
        break;
  
+    case ir_unop_get_buffer_size:
+       unreachable("not reached: not implemented");
+       break;
     case ir_binop_min:
        emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
        break;
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
        if (const_offset_ir) {
@@@ -2567,6 -2561,7 +2562,7 @@@ vec4_visitor::emit_texture(ir_texture_o
     case ir_tg4: opcode = offset_value.file != BAD_FILE
                           ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
     case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+    case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
     case ir_txb:
        unreachable("TXB is not valid for vertex shaders.");
     case ir_lod:
      * - Texel offsets
      * - Gather channel selection
      * - Sampler indices too large to fit in a 4-bit value.
+     * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
      */
     inst->header_size =
        (devinfo->gen < 5 || devinfo->gen >= 9 ||
         inst->offset != 0 || op == ir_tg4 ||
+        op == ir_texture_samples ||
         is_high_sampler(sampler_reg)) ? 1 : 0;
     inst->base_mrf = 2;
-    inst->mlen = inst->header_size + 1; /* always at least one */
+    inst->mlen = inst->header_size;
     inst->dst.writemask = WRITEMASK_XYZW;
     inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
  
     if (op == ir_txs || op == ir_query_levels) {
        int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
        emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+       inst->mlen++;
+    } else if (op == ir_texture_samples) {
+       inst->dst.writemask = WRITEMASK_X;
     } else {
        /* Load the coordinate */
        /* FINISHME: gl_clamp_mask and saturate */
  
        emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
                 coordinate));
+       inst->mlen++;
  
        if (zero_mask != 0) {
           emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
            mrf = param_base;
            writemask = WRITEMASK_W;
         }
-          lod.swizzle = BRW_SWIZZLE_XXXX;
         emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
        } else if (op == ir_txf) {
           emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
     }
  
     if (devinfo->gen == 6 && op == ir_tg4) {
-       emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+       emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
     }
  
     swizzle_result(op, dest,
@@@ -2762,7 -2762,7 +2763,7 @@@ vec4_visitor::visit(ir_texture *ir
      */
     if (ir->op == ir_tg4) {
        ir_constant *chan = ir->lod_info.component->as_constant();
-       int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+       int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
        if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
           dst_reg result(this, ir->type);
           this->result = src_reg(result);
        ir->lod_info.sample_index->accept(this);
        sample_index = this->result;
  
-       if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+       if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
           mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
        else
           mcs = src_reg(0u);
     case ir_txb:
     case ir_lod:
     case ir_tg4:
+    case ir_texture_samples:
        break;
     }
  
@@@ -2898,14 -2899,14 +2900,14 @@@ vec4_visitor::emit_gen6_gather_wa(uint8
  uint32_t
  vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
  {
-    int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+    int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
     switch (swiz) {
        case SWIZZLE_X: return 0;
        case SWIZZLE_Y:
           /* gather4 sampler is broken for green channel on RG32F --
            * we must ask for blue instead.
            */
-          if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+          if (key_tex->gather_channel_quirk_mask & (1 << sampler))
              return 2;
           return 1;
        case SWIZZLE_Z: return 2;
@@@ -2920,7 -2921,7 +2922,7 @@@ vec4_visitor::swizzle_result(ir_texture
                               src_reg orig_val, uint32_t sampler,
                               const glsl_type *dest_type)
  {
-    int s = key->tex.swizzles[sampler];
+    int s = key_tex->swizzles[sampler];
  
     dst_reg swizzled_result = dest;
  
@@@ -3122,7 -3123,8 +3124,8 @@@ vec4_visitor::emit_psiz_and_flags(dst_r
  {
     if (devinfo->gen < 6 &&
         ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-         key->userclip_active || devinfo->has_negative_rhw_bug)) {
+         output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+         devinfo->has_negative_rhw_bug)) {
        dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
        dst_reg header1_w = header1;
        header1_w.writemask = WRITEMASK_W;
         emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
        }
  
-       if (key->userclip_active) {
+       if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
           current_annotation = "Clipping flags";
           dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
           dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
     }
  }
  
- void
- vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
- {
-    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
-     *
-     *     "If a linked set of shaders forming the vertex stage contains no
-     *     static write to gl_ClipVertex or gl_ClipDistance, but the
-     *     application has requested clipping against user clip planes through
-     *     the API, then the coordinate written to gl_Position is used for
-     *     comparison against the user clip planes."
-     *
-     * This function is only called if the shader didn't write to
-     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
-     * if the user wrote to it; otherwise we use gl_Position.
-     */
-    gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
-    if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
-       clip_vertex = VARYING_SLOT_POS;
-    }
-    for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
-         ++i) {
-       reg.writemask = 1 << i;
-       emit(DP4(reg,
-                src_reg(output_reg[clip_vertex]),
-                src_reg(this->userplane[i + offset])));
-    }
- }
  vec4_instruction *
  vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
  {
@@@ -3278,21 -3251,6 +3252,6 @@@ vec4_visitor::emit_urb_slot(dst_reg reg
     case BRW_VARYING_SLOT_PAD:
        /* No need to write to this slot */
        break;
-    case VARYING_SLOT_COL0:
-    case VARYING_SLOT_COL1:
-    case VARYING_SLOT_BFC0:
-    case VARYING_SLOT_BFC1: {
-       /* These built-in varyings are only supported in compatibility mode,
-        * and we only support GS in core profile.  So, this must be a vertex
-        * shader.
-        */
-       assert(stage == MESA_SHADER_VERTEX);
-       vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
-       if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
-          inst->saturate = true;
-       break;
-    }
     default:
        emit_generic_urb_slot(reg, varying);
        break;
@@@ -3337,7 -3295,7 +3296,7 @@@ vec4_visitor::emit_vertex(
      * may need to unspill a register or load from an array.  Those
      * reads would use MRFs 14-15.
      */
-    int max_usable_mrf = 13;
+    int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
  
     /* The following assertion verifies that max_usable_mrf causes an
      * even-numbered amount of URB write data, which will meet gen6's
        emit_ndc_computation();
     }
  
-    /* Lower legacy ff and ClipVertex clipping to clip distances */
-    if (key->userclip_active && !prog->UsesClipDistanceOut) {
-       current_annotation = "user clip distances";
-       output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
-       output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
-       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
-       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
-    }
     /* We may need to split this up into several URB writes, so do them in a
      * loop.
      */
                         prog_data->vue_map.slot_to_varying[slot]);
  
           /* If this was max_usable_mrf, we can't fit anything more into this
-           * URB WRITE.
+           * URB WRITE. Same thing if we reached the maximum length available.
            */
-          if (mrf > max_usable_mrf) {
+          if (mrf > max_usable_mrf ||
+              align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
              slot++;
              break;
           }
@@@ -3763,7 -3711,7 +3712,7 @@@ vec4_visitor::resolve_bool_comparison(i
  vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                             void *log_data,
                             struct gl_program *prog,
-                            const struct brw_vue_prog_key *key,
+                            const struct brw_sampler_prog_key_data *key_tex,
                             struct brw_vue_prog_data *prog_data,
                           struct gl_shader_program *shader_prog,
                             gl_shader_stage stage,
                             int shader_time_index)
     : backend_shader(compiler, log_data, mem_ctx,
                      shader_prog, prog, &prog_data->base, stage),
-      key(key),
+      key_tex(key_tex),
       prog_data(prog_data),
       sanity_param_count(0),
       fail_msg(NULL),
index 41266f57560cb660accb37f29da6008ff745be67,35c0908324ffadda1e69e3f0005e08dd2d49e9ce..1faf2eaa346cd9e1ae32d1098bba97260eaa8481
@@@ -26,6 -26,7 +26,7 @@@
  #include "brw_context.h"
  #include "brw_wm.h"
  #include "brw_state.h"
+ #include "brw_shader.h"
  #include "main/enums.h"
  #include "main/formats.h"
  #include "main/fbobject.h"
@@@ -41,7 -42,7 +42,7 @@@
   * Return a bitfield where bit n is set if barycentric interpolation mode n
   * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
   */
 -static unsigned
 +unsigned
  brw_compute_barycentric_interp_modes(struct brw_context *brw,
                                       bool shade_model_flat,
                                       bool persample_shading,
@@@ -164,11 -165,13 +165,13 @@@ brw_codegen_wm_prog(struct brw_context 
     void *mem_ctx = ralloc_context(NULL);
     struct brw_wm_prog_data prog_data;
     const GLuint *program;
-    struct gl_shader *fs = NULL;
+    struct brw_shader *fs = NULL;
     GLuint program_size;
+    bool start_busy = false;
+    double start_time = 0;
  
     if (prog)
-       fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+       fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  
     memset(&prog_data, 0, sizeof(prog_data));
     /* key->alpha_test_func means simulating alpha testing via discards,
        fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
     prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
  
-    prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+    prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
  
     /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
     if (!prog)
      */
     int param_count;
     if (fs) {
-       param_count = fs->num_uniform_components +
-                     fs->NumImages * BRW_IMAGE_PARAM_SIZE;
-       prog_data.base.nr_image_params = fs->NumImages;
+       param_count = fs->base.num_uniform_components +
+                     fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+       prog_data.base.nr_image_params = fs->base.NumImages;
     } else {
        param_count = fp->program.Base.Parameters->NumParameters * 4;
     }
                                             key->persample_shading,
                                             &fp->program);
  
+    if (unlikely(brw->perf_debug)) {
+       start_busy = (brw->batch.last_bo &&
+                     drm_intel_bo_busy(brw->batch.last_bo));
+       start_time = get_time();
+    }
     program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
                              &fp->program, prog, &program_size);
     if (program == NULL) {
        return false;
     }
  
+    if (unlikely(brw->perf_debug) && fs) {
+       if (fs->compiled_once)
+          brw_wm_debug_recompile(brw, prog, key);
+       fs->compiled_once = true;
+       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+                     (get_time() - start_time) * 1000);
+       }
+    }
     if (prog_data.base.total_scratch) {
        brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
                         prog_data.base.total_scratch * brw->max_wm_threads);
@@@ -642,3 -662,61 +662,61 @@@ brw_upload_wm_prog(struct brw_context *
     }
     brw->wm.base.prog_data = &brw->wm.prog_data->base;
  }
+ bool
+ brw_fs_precompile(struct gl_context *ctx,
+                   struct gl_shader_program *shader_prog,
+                   struct gl_program *prog)
+ {
+    struct brw_context *brw = brw_context(ctx);
+    struct brw_wm_prog_key key;
+    struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+    struct brw_fragment_program *bfp = brw_fragment_program(fp);
+    bool program_uses_dfdy = fp->UsesDFdy;
+    memset(&key, 0, sizeof(key));
+    if (brw->gen < 6) {
+       if (fp->UsesKill)
+          key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+       if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+          key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+       /* Just assume depth testing. */
+       key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+    }
+    if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+                                          BRW_FS_VARYING_INPUT_MASK) > 16)
+       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+    brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+    if (fp->Base.InputsRead & VARYING_BIT_POS) {
+       key.drawable_height = ctx->DrawBuffer->Height;
+    }
+    key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+          BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+    if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+       key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+                           key.nr_color_regions > 1;
+    }
+    key.program_string_id = bfp->id;
+    uint32_t old_prog_offset = brw->wm.base.prog_offset;
+    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+    bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+    brw->wm.base.prog_offset = old_prog_offset;
+    brw->wm.prog_data = old_prog_data;
+    return success;
+ }
diff --combined src/mesa/main/mtypes.h
index 4e88494c387ab7bac542571dc7f7611717aa96fa,288d75790a496a93d4b5160e35503b6daf710e4a..cbfb15522f0b4e1aae08825d99f42592e73c2bac
@@@ -944,6 -944,7 +944,7 @@@ typedef enu
   */
  struct gl_sampler_object
  {
+    mtx_t Mutex;
     GLuint Name;
     GLint RefCount;
     GLchar *Label;               /**< GL_KHR_debug */
@@@ -1887,6 -1888,7 +1888,7 @@@ enum gl_frag_depth_layou
   */
  struct gl_program
  {
+    mtx_t Mutex;
     GLuint Id;
     GLint RefCount;
     GLubyte *String;  /**< Null-terminated program text */
@@@ -2292,6 -2294,7 +2294,7 @@@ struct gl_shade
     struct gl_uniform_block *UniformBlocks;
  
     struct exec_list *ir;
+    struct exec_list *packed_varyings;
     struct glsl_symbol_table *symbols;
  
     bool uses_builtin_functions;
@@@ -2453,7 -2456,8 +2456,8 @@@ enum gl_uniform_block_packin
  {
     ubo_packing_std140,
     ubo_packing_shared,
-    ubo_packing_packed
+    ubo_packing_packed,
+    ubo_packing_std430
  };
  
  
@@@ -2473,11 -2477,6 +2477,11 @@@ struct gl_uniform_bloc
      */
     GLuint Binding;
  
 +   /**
 +    * Vulkan descriptor set qualifier for this block.
 +    */
 +   GLuint Set;
 +
     /**
      * Minimum size (in bytes) of a buffer object to back this uniform buffer
      * (GL_UNIFORM_BLOCK_DATA_SIZE).
@@@ -2690,7 -2689,7 +2694,7 @@@ struct gl_shader_progra
      */
     unsigned LastClipDistanceArraySize;
  
-    unsigned NumUniformBlocks;
+    unsigned NumBufferInterfaceBlocks;
     struct gl_uniform_block *UniformBlocks;
  
     /**
@@@ -3663,6 -3662,7 +3667,7 @@@ struct gl_extension
     GLboolean ARB_shader_stencil_export;
     GLboolean ARB_shader_storage_buffer_object;
     GLboolean ARB_shader_subroutine;
+    GLboolean ARB_shader_texture_image_samples;
     GLboolean ARB_shader_texture_lod;
     GLboolean ARB_shading_language_packing;
     GLboolean ARB_shading_language_420pack;
@@@ -4292,6 -4292,7 +4297,7 @@@ struct gl_contex
     struct gl_perf_monitor_state PerfMonitor;
  
     struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+    struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
  
     struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
     struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
index b8b082e2a5905041761d5d91bd392f691c81041d,4201a80cf620440e6683f0bb7d518fb2cb3a70b0..1cfcf9119f97c2508219a0fc7508b826a05d1b92
@@@ -543,7 -543,6 +543,7 @@@ type_size(const struct glsl_type *type
     case GLSL_TYPE_VOID:
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
 +   case GLSL_TYPE_FUNCTION:
        assert(!"Invalid type in type_size");
        break;
     }
@@@ -1345,9 -1344,11 +1345,11 @@@ ir_to_mesa_visitor::visit(ir_expressio
     case ir_unop_dFdy_coarse:
     case ir_unop_dFdy_fine:
     case ir_unop_subroutine_to_int:
+    case ir_unop_get_buffer_size:
        assert(!"not supported");
        break;
  
+    case ir_unop_ssbo_unsized_array_length:
     case ir_quadop_vector:
        /* This operation should have already been handled.
         */
@@@ -1920,6 -1921,8 +1922,8 @@@ ir_to_mesa_visitor::visit(ir_texture *i
     case ir_query_levels:
        assert(!"Unexpected ir_query_levels opcode");
        break;
+    case ir_texture_samples:
+       unreachable("Unexpected ir_texture_samples opcode");
     }
  
     const glsl_type *sampler_type = ir->sampler->type;
@@@ -2464,7 -2467,6 +2468,7 @@@ _mesa_associate_uniform_storage(struct 
           case GLSL_TYPE_STRUCT:
           case GLSL_TYPE_ERROR:
           case GLSL_TYPE_INTERFACE:
 +         case GLSL_TYPE_FUNCTION:
            assert(!"Should not get here.");
            break;
         }
@@@ -2981,7 -2983,7 +2985,7 @@@ _mesa_glsl_link_shader(struct gl_contex
        if (!ctx->Driver.LinkShader(ctx, prog)) {
         prog->LinkStatus = GL_FALSE;
        } else {
-          build_program_resource_list(ctx, prog);
+          build_program_resource_list(prog);
        }
     }
  
index dcd5581f9574003cc3e1e1cd23bd6797c35559d2,0000000000000000000000000000000000000000..8acfc068e90ac3f0a4f993c228a38c40b89fa88a
mode 100644,000000..100644
--- /dev/null
@@@ -1,1128 -1,0 +1,1120 @@@
-    key->base.program_string_id = vp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                vp->program.Base.UsesClipDistanceOut);
 +/*
 + * Copyright © 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE.
 + */
 +
 +#include <sys/stat.h>
 +#include <unistd.h>
 +#include <fcntl.h>
 +
 +#include "anv_private.h"
 +#include "anv_nir.h"
 +
 +#include <brw_context.h>
 +#include <brw_wm.h> /* brw_new_shader_program is here */
 +#include <brw_nir.h>
 +
 +#include <brw_vs.h>
 +#include <brw_gs.h>
 +#include <brw_cs.h>
 +
 +#include <mesa/main/shaderobj.h>
 +#include <mesa/main/fbobject.h>
 +#include <mesa/main/context.h>
 +#include <mesa/program/program.h>
 +#include <glsl/program.h>
 +
 +/* XXX: We need this to keep symbols in nir.h from conflicting with the
 + * generated GEN command packing headers.  We need to fix *both* to not
 + * define something as generic as LOAD.
 + */
 +#undef LOAD
 +
 +#include <glsl/nir/nir_spirv.h>
 +
 +#define SPIR_V_MAGIC_NUMBER 0x07230203
 +
 +static void
 +fail_if(int cond, const char *format, ...)
 +{
 +   va_list args;
 +
 +   if (!cond)
 +      return;
 +
 +   va_start(args, format);
 +   vfprintf(stderr, format, args);
 +   va_end(args);
 +
 +   exit(1);
 +}
 +
 +static VkResult
 +set_binding_table_layout(struct brw_stage_prog_data *prog_data,
 +                         struct anv_pipeline *pipeline, uint32_t stage)
 +{
 +   uint32_t bias, count, k, *map;
 +   struct anv_pipeline_layout *layout = pipeline->layout;
 +
 +   /* No layout is valid for shaders that don't bind any resources. */
 +   if (pipeline->layout == NULL)
 +      return VK_SUCCESS;
 +
 +   if (stage == VK_SHADER_STAGE_FRAGMENT)
 +      bias = MAX_RTS;
 +   else
 +      bias = 0;
 +
 +   count = layout->stage[stage].surface_count;
 +   prog_data->map_entries =
 +      (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
 +   if (prog_data->map_entries == NULL)
 +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 +
 +   k = bias;
 +   map = prog_data->map_entries;
 +   for (uint32_t i = 0; i < layout->num_sets; i++) {
 +      prog_data->bind_map[i].index = map;
 +      for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
 +         *map++ = k++;
 +
 +      prog_data->bind_map[i].index_count =
 +         layout->set[i].layout->stage[stage].surface_count;
 +   }
 +
 +   return VK_SUCCESS;
 +}
 +
 +static uint32_t
 +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size)
 +{
 +   struct anv_state state =
 +      anv_state_stream_alloc(&pipeline->program_stream, size, 64);
 +
 +   assert(size < pipeline->program_stream.block_pool->block_size);
 +
 +   memcpy(state.map, data, size);
 +
 +   return state.offset;
 +}
 +
 +static void
 +create_params_array(struct anv_pipeline *pipeline,
 +                    struct gl_shader *shader,
 +                    struct brw_stage_prog_data *prog_data)
 +{
 +   VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage);
 +   unsigned num_params = 0;
 +
 +   if (shader->num_uniform_components) {
 +      /* If the shader uses any push constants at all, we'll just give
 +       * them the maximum possible number
 +       */
 +      num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
 +   }
 +
 +   if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
 +      num_params += MAX_DYNAMIC_BUFFERS;
 +
 +   if (num_params == 0)
 +      return;
 +
 +   prog_data->param = (const gl_constant_value **)
 +      anv_device_alloc(pipeline->device,
 +                       num_params * sizeof(gl_constant_value *),
 +                       8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
 +
 +   /* We now set the param values to be offsets into a
 +    * anv_push_constant_data structure.  Since the compiler doesn't
 +    * actually dereference any of the gl_constant_value pointers in the
 +    * params array, it doesn't really matter what we put here.
 +    */
 +   struct anv_push_constants *null_data = NULL;
 +   for (unsigned i = 0; i < num_params; i++)
 +      prog_data->param[i] =
 +         (const gl_constant_value *)&null_data->client_data[i * sizeof(float)];
 +}
 +
 +static void
 +brw_vs_populate_key(struct brw_context *brw,
 +                    struct brw_vertex_program *vp,
 +                    struct brw_vs_prog_key *key)
 +{
 +   struct gl_context *ctx = &brw->ctx;
 +   /* BRW_NEW_VERTEX_PROGRAM */
 +   struct gl_program *prog = (struct gl_program *) vp;
 +
 +   memset(key, 0, sizeof(*key));
 +
 +   /* Just upload the program verbatim for now.  Always send it all
 +    * the inputs it asks for, whether they are varying or not.
 +    */
-                                       &key->base.tex);
++   key->program_string_id = vp->id;
 +
 +   /* _NEW_POLYGON */
 +   if (brw->gen < 6) {
 +      key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 +                           ctx->Polygon.BackMode != GL_FILL);
 +   }
 +
 +   if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
 +                               VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
 +      /* _NEW_LIGHT | _NEW_BUFFERS */
 +      key->clamp_vertex_color = ctx->Light._ClampVertexColor;
 +   }
 +
 +   /* _NEW_POINT */
 +   if (brw->gen < 6 && ctx->Point.PointSprite) {
 +      for (int i = 0; i < 8; i++) {
 +         if (ctx->Point.CoordReplace[i])
 +            key->point_coord_replace |= (1 << i);
 +      }
 +   }
 +
 +   /* _NEW_TEXTURE */
 +   brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-    if (key->base.userclip_active) {
++                                      &key->tex);
 +}
 +
 +static bool
 +really_do_vs_prog(struct brw_context *brw,
 +                  struct gl_shader_program *prog,
 +                  struct brw_vertex_program *vp,
 +                  struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
 +{
 +   GLuint program_size;
 +   const GLuint *program;
 +   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
 +   void *mem_ctx;
 +   struct gl_shader *vs = NULL;
 +
 +   if (prog)
 +      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
 +
 +   memset(prog_data, 0, sizeof(*prog_data));
 +
 +   mem_ctx = ralloc_context(NULL);
 +
 +   create_params_array(pipeline, vs, &prog_data->base.base);
 +   anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir,
 +                                 &prog_data->base.base);
 +
 +   GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
 +   prog_data->inputs_read = vp->program.Base.InputsRead;
 +
 +   if (key->copy_edgeflag) {
 +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
 +      prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
 +   }
 +
 +   if (brw->gen < 6) {
 +      /* Put dummy slots into the VUE for the SF to put the replaced
 +       * point sprite coords in.  We shouldn't need these dummy slots,
 +       * which take up precious URB space, but it would mean that the SF
 +       * doesn't get nice aligned pairs of input coords into output
 +       * coords, which would be a pain to handle.
 +       */
 +      for (int i = 0; i < 8; i++) {
 +         if (key->point_coord_replace & (1 << i))
 +            outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
 +      }
 +
 +      /* if back colors are written, allocate slots for front colors too */
 +      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
 +         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
 +      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
 +         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
 +   }
 +
 +   /* In order for legacy clipping to work, we need to populate the clip
 +    * distance varying slots whenever clipping is enabled, even if the vertex
 +    * shader doesn't write to gl_ClipDistance.
 +    */
-                        &prog_data->base.vue_map, outputs_written);
- \
++   if (key->nr_userclip_plane_consts) {
 +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
 +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
 +   }
 +
 +   brw_compute_vue_map(brw->intelScreen->devinfo,
-    key->base.program_string_id = gp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                gp->program.Base.UsesClipDistanceOut);
++                       &prog_data->base.vue_map, outputs_written,
++                       prog ? prog->SeparateShader : false);
++
 +   set_binding_table_layout(&prog_data->base.base, pipeline,
 +                            VK_SHADER_STAGE_VERTEX);
 +
 +   /* Emit GEN4 code.
 +    */
 +   program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
 +                         prog, &program_size);
 +   if (program == NULL) {
 +      ralloc_free(mem_ctx);
 +      return false;
 +   }
 +
 +   const uint32_t offset = upload_kernel(pipeline, program, program_size);
 +   if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
 +      pipeline->vs_simd8 = offset;
 +      pipeline->vs_vec4 = NO_KERNEL;
 +   } else {
 +      pipeline->vs_simd8 = NO_KERNEL;
 +      pipeline->vs_vec4 = offset;
 +   }
 +
 +   ralloc_free(mem_ctx);
 +
 +   return true;
 +}
 +
 +void brw_wm_populate_key(struct brw_context *brw,
 +                         struct brw_fragment_program *fp,
 +                         struct brw_wm_prog_key *key)
 +{
 +   struct gl_context *ctx = &brw->ctx;
 +   struct gl_program *prog = (struct gl_program *) brw->fragment_program;
 +   GLuint lookup = 0;
 +   GLuint line_aa;
 +   bool program_uses_dfdy = fp->program.UsesDFdy;
 +   struct gl_framebuffer draw_buffer;
 +   bool multisample_fbo;
 +
 +   memset(key, 0, sizeof(*key));
 +
 +   for (int i = 0; i < MAX_SAMPLERS; i++) {
 +      /* Assume color sampler, no swizzling. */
 +      key->tex.swizzles[i] = SWIZZLE_XYZW;
 +   }
 +
 +   /* A non-zero framebuffer name indicates that the framebuffer was created by
 +    * the user rather than the window system. */
 +   draw_buffer.Name = 1;
 +   draw_buffer.Visual.samples = 1;
 +   draw_buffer._NumColorDrawBuffers = 1;
 +   draw_buffer._NumColorDrawBuffers = 1;
 +   draw_buffer.Width = 400;
 +   draw_buffer.Height = 400;
 +   ctx->DrawBuffer = &draw_buffer;
 +
 +   multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
 +
 +   /* Build the index for table lookup
 +    */
 +   if (brw->gen < 6) {
 +      /* _NEW_COLOR */
 +      if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
 +         lookup |= IZ_PS_KILL_ALPHATEST_BIT;
 +
 +      if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 +         lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
 +
 +      /* _NEW_DEPTH */
 +      if (ctx->Depth.Test)
 +         lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 +
 +      if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
 +         lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 +
 +      /* _NEW_STENCIL | _NEW_BUFFERS */
 +      if (ctx->Stencil._Enabled) {
 +         lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 +
 +         if (ctx->Stencil.WriteMask[0] ||
 +             ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
 +            lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
 +      }
 +      key->iz_lookup = lookup;
 +   }
 +
 +   line_aa = AA_NEVER;
 +
 +   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
 +   if (ctx->Line.SmoothFlag) {
 +      if (brw->reduced_primitive == GL_LINES) {
 +         line_aa = AA_ALWAYS;
 +      }
 +      else if (brw->reduced_primitive == GL_TRIANGLES) {
 +         if (ctx->Polygon.FrontMode == GL_LINE) {
 +            line_aa = AA_SOMETIMES;
 +
 +            if (ctx->Polygon.BackMode == GL_LINE ||
 +                (ctx->Polygon.CullFlag &&
 +                 ctx->Polygon.CullFaceMode == GL_BACK))
 +               line_aa = AA_ALWAYS;
 +         }
 +         else if (ctx->Polygon.BackMode == GL_LINE) {
 +            line_aa = AA_SOMETIMES;
 +
 +            if ((ctx->Polygon.CullFlag &&
 +                 ctx->Polygon.CullFaceMode == GL_FRONT))
 +               line_aa = AA_ALWAYS;
 +         }
 +      }
 +   }
 +
 +   key->line_aa = line_aa;
 +
 +   /* _NEW_HINT */
 +   key->high_quality_derivatives =
 +      ctx->Hint.FragmentShaderDerivative == GL_NICEST;
 +
 +   if (brw->gen < 6)
 +      key->stats_wm = brw->stats_wm;
 +
 +   /* _NEW_LIGHT */
 +   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
 +
 +   /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
 +   key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
 +
 +   /* _NEW_TEXTURE */
 +   brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
 +                                      &key->tex);
 +
 +   /* _NEW_BUFFERS */
 +   /*
 +    * Include the draw buffer origin and height so that we can calculate
 +    * fragment position values relative to the bottom left of the drawable,
 +    * from the incoming screen origin relative position we get as part of our
 +    * payload.
 +    *
 +    * This is only needed for the WM_WPOSXY opcode when the fragment program
 +    * uses the gl_FragCoord input.
 +    *
 +    * We could avoid recompiling by including this as a constant referenced by
 +    * our program, but if we were to do that it would also be nice to handle
 +    * getting that constant updated at batchbuffer submit time (when we
 +    * hold the lock and know where the buffer really is) rather than at emit
 +    * time when we don't hold the lock and are just guessing.  We could also
 +    * just avoid using this as key data if the program doesn't use
 +    * fragment.position.
 +    *
 +    * For DRI2 the origin_x/y will always be (0,0) but we still need the
 +    * drawable height in order to invert the Y axis.
 +    */
 +   if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
 +      key->drawable_height = ctx->DrawBuffer->Height;
 +   }
 +
 +   if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
 +      key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 +   }
 +
 +   /* _NEW_BUFFERS */
 +   key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
 +
 +   /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
 +   key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
 +      (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
 +
 +   /* _NEW_BUFFERS _NEW_MULTISAMPLE */
 +   /* Ignore sample qualifier while computing this flag. */
 +   key->persample_shading =
 +      _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
 +   if (key->persample_shading)
 +      key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
 +
 +   key->compute_pos_offset =
 +      _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
 +      fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
 +
 +   key->compute_sample_id =
 +      multisample_fbo &&
 +      ctx->Multisample.Enabled &&
 +      (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
 +
 +   /* BRW_NEW_VUE_MAP_GEOM_OUT */
 +   if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
 +                                         BRW_FS_VARYING_INPUT_MASK) > 16)
 +      key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
 +
 +
 +   /* _NEW_COLOR | _NEW_BUFFERS */
 +   /* Pre-gen6, the hardware alpha test always used each render
 +    * target's alpha to do alpha test, as opposed to render target 0's alpha
 +    * like GL requires.  Fix that by building the alpha test into the
 +    * shader, and we'll skip enabling the fixed function alpha test.
 +    */
 +   if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
 +      key->alpha_test_func = ctx->Color.AlphaFunc;
 +      key->alpha_test_ref = ctx->Color.AlphaRef;
 +   }
 +
 +   /* The unique fragment program ID */
 +   key->program_string_id = fp->id;
 +
 +   ctx->DrawBuffer = NULL;
 +}
 +
 +static uint8_t
 +computed_depth_mode(struct gl_fragment_program *fp)
 +{
 +   if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 +      switch (fp->FragDepthLayout) {
 +      case FRAG_DEPTH_LAYOUT_NONE:
 +      case FRAG_DEPTH_LAYOUT_ANY:
 +         return BRW_PSCDEPTH_ON;
 +      case FRAG_DEPTH_LAYOUT_GREATER:
 +         return BRW_PSCDEPTH_ON_GE;
 +      case FRAG_DEPTH_LAYOUT_LESS:
 +         return BRW_PSCDEPTH_ON_LE;
 +      case FRAG_DEPTH_LAYOUT_UNCHANGED:
 +         return BRW_PSCDEPTH_OFF;
 +      }
 +   }
 +   return BRW_PSCDEPTH_OFF;
 +}
 +
 +static bool
 +really_do_wm_prog(struct brw_context *brw,
 +                  struct gl_shader_program *prog,
 +                  struct brw_fragment_program *fp,
 +                  struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
 +{
 +   void *mem_ctx = ralloc_context(NULL);
 +   struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
 +   struct gl_shader *fs = NULL;
 +   unsigned int program_size;
 +   const uint32_t *program;
 +
 +   if (prog)
 +      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
 +
 +   memset(prog_data, 0, sizeof(*prog_data));
 +
 +   /* key->alpha_test_func means simulating alpha testing via discards,
 +    * so the shader definitely kills pixels.
 +    */
 +   prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
 +
 +   prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
 +
 +   create_params_array(pipeline, fs, &prog_data->base);
 +   anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base);
 +
 +   prog_data->barycentric_interp_modes =
 +      brw_compute_barycentric_interp_modes(brw, key->flat_shade,
 +                                           key->persample_shading,
 +                                           &fp->program);
 +
 +   set_binding_table_layout(&prog_data->base, pipeline,
 +                            VK_SHADER_STAGE_FRAGMENT);
 +   /* This needs to come after shader time and pull constant entries, but we
 +    * don't have those set up now, so just put it after the layout entries.
 +    */
 +   prog_data->binding_table.render_target_start = 0;
 +
 +   program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
 +                            &fp->program, prog, &program_size);
 +   if (program == NULL) {
 +      ralloc_free(mem_ctx);
 +      return false;
 +   }
 +
 +   uint32_t offset = upload_kernel(pipeline, program, program_size);
 +
 +   if (prog_data->no_8)
 +      pipeline->ps_simd8 = NO_KERNEL;
 +   else
 +      pipeline->ps_simd8 = offset;
 +
 +   if (prog_data->no_8 || prog_data->prog_offset_16) {
 +      pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
 +   } else {
 +      pipeline->ps_simd16 = NO_KERNEL;
 +   }
 +
 +   ralloc_free(mem_ctx);
 +
 +   return true;
 +}
 +
 +static void
 +brw_gs_populate_key(struct brw_context *brw,
 +                    struct anv_pipeline *pipeline,
 +                    struct brw_geometry_program *gp,
 +                    struct brw_gs_prog_key *key)
 +{
 +   struct gl_context *ctx = &brw->ctx;
 +   struct brw_stage_state *stage_state = &brw->gs.base;
 +   struct gl_program *prog = &gp->program.Base;
 +
 +   memset(key, 0, sizeof(*key));
 +
-                                       &key->base.tex);
-    struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
-    /* BRW_NEW_VUE_MAP_VS */
-    key->input_varyings = prog_data->base.vue_map.slots_valid;
++   key->program_string_id = gp->id;
 +
 +   /* _NEW_TEXTURE */
 +   brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
++                                      &key->tex);
 +}
 +
 +static bool
 +really_do_gs_prog(struct brw_context *brw,
 +                  struct gl_shader_program *prog,
 +                  struct brw_geometry_program *gp,
 +                  struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
 +{
 +   struct brw_gs_compile_output output;
 +
 +   /* FIXME: We pass the bind map to the compile in the output struct. Need
 +    * something better. */
 +   set_binding_table_layout(&output.prog_data.base.base,
 +                            pipeline, VK_SHADER_STAGE_GEOMETRY);
 +
 +   brw_compile_gs_prog(brw, prog, gp, key, &output);
 +
 +   pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
 +   pipeline->gs_vertex_count = gp->program.VerticesIn;
 +
 +   ralloc_free(output.mem_ctx);
 +
 +   return true;
 +}
 +
 +static bool
 +brw_codegen_cs_prog(struct brw_context *brw,
 +                    struct gl_shader_program *prog,
 +                    struct brw_compute_program *cp,
 +                    struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
 +{
 +   const GLuint *program;
 +   void *mem_ctx = ralloc_context(NULL);
 +   GLuint program_size;
 +   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
 +
 +   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
 +   assert (cs);
 +
 +   memset(prog_data, 0, sizeof(*prog_data));
 +
 +   set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
 +
 +   create_params_array(pipeline, cs, &prog_data->base);
 +   anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
 +
 +   program = brw_cs_emit(brw, mem_ctx, key, prog_data,
 +                         &cp->program, prog, &program_size);
 +   if (program == NULL) {
 +      ralloc_free(mem_ctx);
 +      return false;
 +   }
 +
 +   if (unlikely(INTEL_DEBUG & DEBUG_CS))
 +      fprintf(stderr, "\n");
 +
 +   pipeline->cs_simd = upload_kernel(pipeline, program, program_size);
 +
 +   ralloc_free(mem_ctx);
 +
 +   return true;
 +}
 +
 +static void
 +brw_cs_populate_key(struct brw_context *brw,
 +                    struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
 +{
 +   memset(key, 0, sizeof(*key));
 +
 +   /* The unique compute program ID */
 +   key->program_string_id = bcp->id;
 +}
 +
 +struct anv_compiler {
 +   struct anv_device *device;
 +   struct intel_screen *screen;
 +   struct brw_context *brw;
 +   struct gl_pipeline_object pipeline;
 +};
 +
 +extern "C" {
 +
 +struct anv_compiler *
 +anv_compiler_create(struct anv_device *device)
 +{
 +   const struct brw_device_info *devinfo = &device->info;
 +   struct anv_compiler *compiler;
 +   struct gl_context *ctx;
 +
 +   compiler = rzalloc(NULL, struct anv_compiler);
 +   if (compiler == NULL)
 +      return NULL;
 +
 +   compiler->screen = rzalloc(compiler, struct intel_screen);
 +   if (compiler->screen == NULL)
 +      goto fail;
 +
 +   compiler->brw = rzalloc(compiler, struct brw_context);
 +   if (compiler->brw == NULL)
 +      goto fail;
 +
 +   compiler->device = device;
 +
 +   compiler->brw->gen = devinfo->gen;
 +   compiler->brw->is_g4x = devinfo->is_g4x;
 +   compiler->brw->is_baytrail = devinfo->is_baytrail;
 +   compiler->brw->is_haswell = devinfo->is_haswell;
 +   compiler->brw->is_cherryview = devinfo->is_cherryview;
 +
 +   /* We need this at least for CS, which will check brw->max_cs_threads
 +    * against the work group size. */
 +   compiler->brw->max_vs_threads = devinfo->max_vs_threads;
 +   compiler->brw->max_hs_threads = devinfo->max_hs_threads;
 +   compiler->brw->max_ds_threads = devinfo->max_ds_threads;
 +   compiler->brw->max_gs_threads = devinfo->max_gs_threads;
 +   compiler->brw->max_wm_threads = devinfo->max_wm_threads;
 +   compiler->brw->max_cs_threads = devinfo->max_cs_threads;
 +   compiler->brw->urb.size = devinfo->urb.size;
 +   compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 +   compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 +   compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 +   compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 +   compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 +
 +   compiler->brw->intelScreen = compiler->screen;
 +   compiler->screen->devinfo = &device->info;
 +
 +   brw_process_intel_debug_variable(compiler->screen);
 +
 +   compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
 +
 +   ctx = &compiler->brw->ctx;
 +   _mesa_init_shader_object_functions(&ctx->Driver);
 +
 +   /* brw_select_clip_planes() needs this for bogus reasons. */
 +   ctx->_Shader = &compiler->pipeline;
 +
 +   return compiler;
 +
 + fail:
 +   ralloc_free(compiler);
 +   return NULL;
 +}
 +
 +void
 +anv_compiler_destroy(struct anv_compiler *compiler)
 +{
 +   _mesa_free_errors_data(&compiler->brw->ctx);
 +   ralloc_free(compiler);
 +}
 +
 +/* From gen7_urb.c */
 +
 +/* FIXME: Add to struct intel_device_info */
 +
 +static const int gen8_push_size = 32 * 1024;
 +
 +static void
 +gen7_compute_urb_partition(struct anv_pipeline *pipeline)
 +{
 +   const struct brw_device_info *devinfo = &pipeline->device->info;
 +   bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
 +   unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
 +   unsigned vs_entry_size_bytes = vs_size * 64;
 +   bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
 +   unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
 +   unsigned gs_entry_size_bytes = gs_size * 64;
 +
 +   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
 +    *
 +    *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
 +    *     Allocation Size is less than 9 512-bit URB entries.
 +    *
 +    * Similar text exists for GS.
 +    */
 +   unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
 +   unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
 +
 +   /* URB allocations must be done in 8k chunks. */
 +   unsigned chunk_size_bytes = 8192;
 +
 +   /* Determine the size of the URB in chunks. */
 +   unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
 +
 +   /* Reserve space for push constants */
 +   unsigned push_constant_bytes = gen8_push_size;
 +   unsigned push_constant_chunks =
 +      push_constant_bytes / chunk_size_bytes;
 +
 +   /* Initially, assign each stage the minimum amount of URB space it needs,
 +    * and make a note of how much additional space it "wants" (the amount of
 +    * additional space it could actually make use of).
 +    */
 +
 +   /* VS has a lower limit on the number of URB entries */
 +   unsigned vs_chunks =
 +      ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
 +            chunk_size_bytes) / chunk_size_bytes;
 +   unsigned vs_wants =
 +      ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
 +            chunk_size_bytes) / chunk_size_bytes - vs_chunks;
 +
 +   unsigned gs_chunks = 0;
 +   unsigned gs_wants = 0;
 +   if (gs_present) {
 +      /* There are two constraints on the minimum amount of URB space we can
 +       * allocate:
 +       *
 +       * (1) We need room for at least 2 URB entries, since we always operate
 +       * the GS in DUAL_OBJECT mode.
 +       *
 +       * (2) We can't allocate less than nr_gs_entries_granularity.
 +       */
 +      gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
 +                        chunk_size_bytes) / chunk_size_bytes;
 +      gs_wants =
 +         ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
 +               chunk_size_bytes) / chunk_size_bytes - gs_chunks;
 +   }
 +
 +   /* There should always be enough URB space to satisfy the minimum
 +    * requirements of each stage.
 +    */
 +   unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
 +   assert(total_needs <= urb_chunks);
 +
 +   /* Mete out remaining space (if any) in proportion to "wants". */
 +   unsigned total_wants = vs_wants + gs_wants;
 +   unsigned remaining_space = urb_chunks - total_needs;
 +   if (remaining_space > total_wants)
 +      remaining_space = total_wants;
 +   if (remaining_space > 0) {
 +      unsigned vs_additional = (unsigned)
 +         round(vs_wants * (((double) remaining_space) / total_wants));
 +      vs_chunks += vs_additional;
 +      remaining_space -= vs_additional;
 +      gs_chunks += remaining_space;
 +   }
 +
 +   /* Sanity check that we haven't over-allocated. */
 +   assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
 +
 +   /* Finally, compute the number of entries that can fit in the space
 +    * allocated to each stage.
 +    */
 +   unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
 +   unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
 +
 +   /* Since we rounded up when computing *_wants, this may be slightly more
 +    * than the maximum allowed amount, so correct for that.
 +    */
 +   nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
 +   nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
 +
 +   /* Ensure that we program a multiple of the granularity. */
 +   nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
 +   nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
 +
 +   /* Finally, sanity check to make sure we have at least the minimum number
 +    * of entries needed for each stage.
 +    */
 +   assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
 +   if (gs_present)
 +      assert(nr_gs_entries >= 2);
 +
 +   /* Lay out the URB in the following order:
 +    * - push constants
 +    * - VS
 +    * - GS
 +    */
 +   pipeline->urb.vs_start = push_constant_chunks;
 +   pipeline->urb.vs_size = vs_size;
 +   pipeline->urb.nr_vs_entries = nr_vs_entries;
 +
 +   pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
 +   pipeline->urb.gs_size = gs_size;
 +   pipeline->urb.nr_gs_entries = nr_gs_entries;
 +}
 +
 +static const struct {
 +   uint32_t token;
 +   gl_shader_stage stage;
 +   const char *name;
 +} stage_info[] = {
 +   { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" },
 +   { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" },
 +   { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" },
 +   { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" },
 +   { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" },
 +   { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" },
 +};
 +
 +struct spirv_header{
 +   uint32_t magic;
 +   uint32_t version;
 +   uint32_t gen_magic;
 +};
 +
 +static void
 +setup_nir_io(struct gl_shader *mesa_shader,
 +             nir_shader *shader)
 +{
 +   struct gl_program *prog = mesa_shader->Program;
 +   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
 +      prog->InputsRead |= BITFIELD64_BIT(var->data.location);
 +      if (shader->stage == MESA_SHADER_FRAGMENT) {
 +         struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog;
 +
 +         fprog->InterpQualifier[var->data.location] =
 +            (glsl_interp_qualifier)var->data.interpolation;
 +         if (var->data.centroid)
 +            fprog->IsCentroid |= BITFIELD64_BIT(var->data.location);
 +         if (var->data.sample)
 +            fprog->IsSample |= BITFIELD64_BIT(var->data.location);
 +      }
 +   }
 +
 +   foreach_list_typed(nir_variable, var, node, &shader->outputs) {
 +      prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
 +   }
 +
 +   mesa_shader->num_uniform_components = shader->num_uniforms;
 +}
 +
 +static void
 +anv_compile_shader_spirv(struct anv_compiler *compiler,
 +                         struct gl_shader_program *program,
 +                         struct anv_pipeline *pipeline, uint32_t stage)
 +{
 +   struct brw_context *brw = compiler->brw;
 +   struct anv_shader *shader = pipeline->shaders[stage];
 +   struct gl_shader *mesa_shader;
 +   int name = 0;
 +   uint32_t *spirv;
 +
 +   mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
 +   fail_if(mesa_shader == NULL,
 +           "failed to create %s shader\n", stage_info[stage].name);
 +
 +#define CREATE_PROGRAM(stage) \
 +   _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0)
 +
 +   bool is_scalar;
 +   struct gl_program *prog;
 +   switch (stage) {
 +   case VK_SHADER_STAGE_VERTEX:
 +      prog = CREATE_PROGRAM(vertex);
 +      is_scalar = compiler->screen->compiler->scalar_vs;
 +      break;
 +   case VK_SHADER_STAGE_GEOMETRY:
 +      prog = CREATE_PROGRAM(geometry);
 +      is_scalar = false;
 +      break;
 +   case VK_SHADER_STAGE_FRAGMENT:
 +      prog = CREATE_PROGRAM(fragment);
 +      is_scalar = true;
 +      break;
 +   case VK_SHADER_STAGE_COMPUTE:
 +      prog = CREATE_PROGRAM(compute);
 +      is_scalar = true;
 +      break;
 +   default:
 +      unreachable("Unsupported shader stage");
 +   }
 +   _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog);
 +
 +   mesa_shader->Program->Parameters =
 +      rzalloc(mesa_shader, struct gl_program_parameter_list);
 +
 +   mesa_shader->Type = stage_info[stage].token;
 +   mesa_shader->Stage = stage_info[stage].stage;
 +
 +   struct gl_shader_compiler_options *glsl_options =
 +      &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage];
 +
 +   spirv = (uint32_t *) shader->module->data;
 +   assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
 +   assert(shader->module->size % 4 == 0);
 +
 +   mesa_shader->Program->nir =
 +      spirv_to_nir(spirv, shader->module->size / 4,
 +                   stage_info[stage].stage, glsl_options->NirOptions);
 +   nir_validate_shader(mesa_shader->Program->nir);
 +
 +   brw_process_nir(mesa_shader->Program->nir,
 +                   compiler->screen->devinfo,
 +                   NULL, mesa_shader->Stage, is_scalar);
 +
 +   setup_nir_io(mesa_shader, mesa_shader->Program->nir);
 +
 +   fail_if(mesa_shader->Program->nir == NULL,
 +           "failed to translate SPIR-V to NIR\n");
 +
 +   _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders],
 +                          mesa_shader);
 +   program->NumShaders++;
 +}
 +
 +static void
 +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
 +                   struct brw_stage_prog_data *prog_data)
 +{
 +   struct brw_device_info *devinfo = &pipeline->device->info;
 +   uint32_t max_threads[] = {
 +      [VK_SHADER_STAGE_VERTEX]                  = devinfo->max_vs_threads,
 +      [VK_SHADER_STAGE_TESS_CONTROL]            = 0,
 +      [VK_SHADER_STAGE_TESS_EVALUATION]         = 0,
 +      [VK_SHADER_STAGE_GEOMETRY]                = devinfo->max_gs_threads,
 +      [VK_SHADER_STAGE_FRAGMENT]                = devinfo->max_wm_threads,
 +      [VK_SHADER_STAGE_COMPUTE]                 = devinfo->max_cs_threads,
 +   };
 +
 +   pipeline->prog_data[stage] = prog_data;
 +   pipeline->active_stages |= 1 << stage;
 +   pipeline->scratch_start[stage] = pipeline->total_scratch;
 +   pipeline->total_scratch =
 +      align_u32(pipeline->total_scratch, 1024) +
 +      prog_data->total_scratch * max_threads[stage];
 +}
 +
 +int
 +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
 +{
 +   struct gl_shader_program *program;
 +   int name = 0;
 +   struct brw_context *brw = compiler->brw;
 +
 +   pipeline->writes_point_size = false;
 +
 +   /* When we free the pipeline, we detect stages based on the NULL status
 +    * of various prog_data pointers.  Make them NULL by default.
 +    */
 +   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
 +   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
 +
 +   brw->use_rep_send = pipeline->use_repclear;
 +   brw->no_simd8 = pipeline->use_repclear;
 +
 +   program = brw->ctx.Driver.NewShaderProgram(name);
 +   program->Shaders = (struct gl_shader **)
 +      calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
 +   fail_if(program == NULL || program->Shaders == NULL,
 +           "failed to create program\n");
 +
 +   for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
 +      if (pipeline->shaders[i])
 +         anv_compile_shader_spirv(compiler, program, pipeline, i);
 +   }
 +
 +   for (unsigned i = 0; i < program->NumShaders; i++) {
 +      struct gl_shader *shader = program->Shaders[i];
 +      program->_LinkedShaders[shader->Stage] = shader;
 +   }
 +
 +   bool success;
 +   pipeline->active_stages = 0;
 +   pipeline->total_scratch = 0;
 +
 +   if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
 +      struct brw_vs_prog_key vs_key;
 +      struct gl_vertex_program *vp = (struct gl_vertex_program *)
 +         program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
 +      struct brw_vertex_program *bvp = brw_vertex_program(vp);
 +
 +      brw_vs_populate_key(brw, bvp, &vs_key);
 +
 +      success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
 +      fail_if(!success, "do_wm_prog failed\n");
 +      add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
 +                         &pipeline->vs_prog_data.base.base);
 +
 +      if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
 +         pipeline->writes_point_size = true;
 +   } else {
 +      memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
 +      pipeline->vs_simd8 = NO_KERNEL;
 +      pipeline->vs_vec4 = NO_KERNEL;
 +   }
 +
 +
 +   if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
 +      struct brw_gs_prog_key gs_key;
 +      struct gl_geometry_program *gp = (struct gl_geometry_program *)
 +         program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
 +      struct brw_geometry_program *bgp = brw_geometry_program(gp);
 +
 +      brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
 +
 +      success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
 +      fail_if(!success, "do_gs_prog failed\n");
 +      add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
 +                         &pipeline->gs_prog_data.base.base);
 +
 +      if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
 +         pipeline->writes_point_size = true;
 +   } else {
 +      pipeline->gs_vec4 = NO_KERNEL;
 +   }
 +
 +   if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
 +      struct brw_wm_prog_key wm_key;
 +      struct gl_fragment_program *fp = (struct gl_fragment_program *)
 +         program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
 +      struct brw_fragment_program *bfp = brw_fragment_program(fp);
 +
 +      brw_wm_populate_key(brw, bfp, &wm_key);
 +
 +      success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
 +      fail_if(!success, "do_wm_prog failed\n");
 +      add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
 +                         &pipeline->wm_prog_data.base);
 +   }
 +
 +   if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
 +      struct brw_cs_prog_key cs_key;
 +      struct gl_compute_program *cp = (struct gl_compute_program *)
 +         program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
 +      struct brw_compute_program *bcp = brw_compute_program(cp);
 +
 +      brw_cs_populate_key(brw, bcp, &cs_key);
 +
 +      success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
 +      fail_if(!success, "brw_codegen_cs_prog failed\n");
 +      add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
 +                         &pipeline->cs_prog_data.base);
 +   }
 +
 +   brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
 +
 +   struct anv_device *device = compiler->device;
 +   while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
 +      anv_block_pool_alloc(&device->scratch_block_pool);
 +
 +   gen7_compute_urb_partition(pipeline);
 +
 +   return 0;
 +}
 +
 +/* This badly named function frees the struct anv_pipeline data that the compiler
 + * allocates.  Currently just the prog_data structs.
 + */
 +void
 +anv_compiler_free(struct anv_pipeline *pipeline)
 +{
 +   for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) {
 +      if (pipeline->prog_data[stage]) {
 +         free(pipeline->prog_data[stage]->map_entries);
 +         /* We only ever set up the params array because we don't do
 +          * non-UBO pull constants
 +          */
 +         anv_device_free(pipeline->device, pipeline->prog_data[stage]->param);
 +      }
 +   }
 +}
 +
 +}
index 367c4f82d64e504f1df7a03a2000d0215b942fb3,0000000000000000000000000000000000000000..1f6c64a9e02507ef115c177f56f7af8d4167be19
mode 100644,000000..100644
--- /dev/null
@@@ -1,150 -1,0 +1,149 @@@
-                                nir_src_for_ssa(&new_load->dest.ssa),
-                                state->shader);
 +/*
 + * Copyright © 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include "anv_nir.h"
 +#include "glsl/nir/nir_builder.h"
 +
 +struct apply_dynamic_offsets_state {
 +   nir_shader *shader;
 +   nir_builder builder;
 +
 +   VkShaderStage stage;
 +   struct anv_pipeline_layout *layout;
 +
 +   uint32_t indices_start;
 +};
 +
 +static bool
 +apply_dynamic_offsets_block(nir_block *block, void *void_state)
 +{
 +   struct apply_dynamic_offsets_state *state = void_state;
 +   struct anv_descriptor_set_layout *set_layout;
 +   const struct anv_descriptor_slot *slot;
 +
 +   nir_foreach_instr_safe(block, instr) {
 +      if (instr->type != nir_instr_type_intrinsic)
 +         continue;
 +
 +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 +
 +      bool has_indirect = false;
 +      uint32_t set, binding;
 +      switch (intrin->intrinsic) {
 +      case nir_intrinsic_load_ubo_indirect:
 +         has_indirect = true;
 +         /* fallthrough */
 +      case nir_intrinsic_load_ubo: {
 +         set = intrin->const_index[0];
 +
 +         nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]);
 +         if (const_binding) {
 +            binding = const_binding->u[0];
 +         } else {
 +            assert(0 && "need more info from the ir for this.");
 +         }
 +         break;
 +      }
 +      default:
 +         continue; /* the loop */
 +      }
 +
 +      set_layout = state->layout->set[set].layout;
 +      slot = &set_layout->stage[state->stage].surface_start[binding];
 +      if (slot->dynamic_slot < 0)
 +         continue;
 +
 +      uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start +
 +                               slot->dynamic_slot;
 +
 +      state->builder.cursor = nir_before_instr(&intrin->instr);
 +
 +      nir_intrinsic_instr *offset_load =
 +         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
 +      offset_load->num_components = 1;
 +      offset_load->const_index[0] = state->indices_start + dynamic_index;
 +      offset_load->const_index[1] = 0;
 +      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL);
 +      nir_builder_instr_insert(&state->builder, &offset_load->instr);
 +
 +      nir_ssa_def *offset = &offset_load->dest.ssa;
 +      if (has_indirect) {
 +         assert(intrin->src[1].is_ssa);
 +         offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset);
 +      }
 +
 +      assert(intrin->dest.is_ssa);
 +
 +      nir_intrinsic_instr *new_load =
 +         nir_intrinsic_instr_create(state->shader,
 +                                    nir_intrinsic_load_ubo_indirect);
 +      new_load->num_components = intrin->num_components;
 +      new_load->const_index[0] = intrin->const_index[0];
 +      new_load->const_index[1] = intrin->const_index[1];
 +      nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr);
 +      new_load->src[1] = nir_src_for_ssa(offset);
 +      nir_ssa_dest_init(&new_load->instr, &new_load->dest,
 +                        intrin->dest.ssa.num_components,
 +                        intrin->dest.ssa.name);
 +      nir_builder_instr_insert(&state->builder, &new_load->instr);
 +
 +      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++                               nir_src_for_ssa(&new_load->dest.ssa));
 +
 +      nir_instr_remove(&intrin->instr);
 +   }
 +
 +   return true;
 +}
 +
 +void
 +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
 +                              nir_shader *shader,
 +                              struct brw_stage_prog_data *prog_data)
 +{
 +   struct apply_dynamic_offsets_state state = {
 +      .shader = shader,
 +      .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage),
 +      .layout = pipeline->layout,
 +      .indices_start = shader->num_uniforms,
 +   };
 +
 +   if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets)
 +      return;
 +
 +   nir_foreach_overload(shader, overload) {
 +      if (overload->impl) {
 +         nir_builder_init(&state.builder, overload->impl);
 +         nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state);
 +         nir_metadata_preserve(overload->impl, nir_metadata_block_index |
 +                                               nir_metadata_dominance);
 +      }
 +   }
 +
 +   struct anv_push_constants *null_data = NULL;
 +   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
 +      prog_data->param[i + shader->num_uniforms] =
 +         (const gl_constant_value *)&null_data->dynamic_offsets[i];
 +
 +   shader->num_uniforms += MAX_DYNAMIC_BUFFERS;
 +}