Merge remote-tracking branch 'public/master' into vulkan
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 15 Apr 2016 00:14:28 +0000 (17:14 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 15 Apr 2016 00:14:28 +0000 (17:14 -0700)
40 files changed:
SConstruct
appveyor.yml
configure.ac
doxygen/Makefile.orig [deleted file]
doxygen/header.html.orig [deleted file]
install-gallium-links.mk
scons/gallium.py
src/compiler/SConscript
src/compiler/SConscript.nir [new file with mode: 0644]
src/compiler/nir/nir.h
src/compiler/nir/nir_intrinsics.h
src/compiler/nir/nir_lower_io.c
src/compiler/nir/nir_print.c
src/compiler/nir/nir_to_ssa.c
src/gallium/Makefile.am
src/gallium/drivers/llvmpipe/SConscript
src/gallium/drivers/nouveau/nv_object.xml.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c
src/gallium/drivers/radeon/r600_buffer_common.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c
src/gallium/drivers/swr/Makefile.am
src/gallium/drivers/swr/Makefile.sources
src/gallium/drivers/swr/Makefile.sources-arch [deleted file]
src/gallium/drivers/swr/avx/Makefile.am [deleted file]
src/gallium/drivers/swr/avx2/Makefile.am [deleted file]
src/gallium/tests/unit/SConscript
src/gallium/tests/unit/pipe_barrier_test.c
src/gallium/tests/unit/translate_test.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/intel_screen.c
src/mesa/state_tracker/st_cb_bitmap.c
src/util/SConscript

index ef71ab69c3844cd0a3837f76f34d639ea460cd79..e2e49fcc6ca4aa93f75440517ed89f99cca6dab9 100644 (file)
@@ -84,6 +84,11 @@ env.Append(CPPPATH = [
 #print env.Dump()
 
 
+# Add a check target for running tests
+check = env.Alias('check')
+env.AlwaysBuild(check)
+
+
 #######################################################################
 # Invoke host SConscripts 
 # 
index bf7ac75285750261df7ce3207f16819fc3344ad4..2e9b9d650d784dcfcaeb0d515f549bd1b45b32a2 100644 (file)
@@ -65,6 +65,9 @@ install:
 build_script:
 - scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
 
+after_build:
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
+
 
 # It's possible to setup notification here, as described in
 # http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but
index 0d3f0ff3c9d80f113387f51261afcf9b9189d862..2aa46dccdbb6758b675b30ad148c2f3b6df46fc0 100644 (file)
@@ -2551,8 +2551,6 @@ AC_CONFIG_FILES([Makefile
                src/gallium/drivers/softpipe/Makefile
                src/gallium/drivers/svga/Makefile
                src/gallium/drivers/swr/Makefile
-               src/gallium/drivers/swr/avx/Makefile
-               src/gallium/drivers/swr/avx2/Makefile
                src/gallium/drivers/trace/Makefile
                src/gallium/drivers/vc4/Makefile
                src/gallium/drivers/virgl/Makefile
diff --git a/doxygen/Makefile.orig b/doxygen/Makefile.orig
deleted file mode 100644 (file)
index b1bc15d..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-
-default: full
-
-all: full subset
-
-%.tag: %.doxy
-       doxygen $<
-
-FULL = \
-       main.doxy \
-       math.doxy \
-       vbo.doxy \
-       glapi.doxy \
-       glsl.doxy \
-       swrast.doxy \
-       swrast_setup.doxy \
-       tnl.doxy \
-       tnl_dd.doxy \
-       gbm.doxy \
-       i965.doxy
-
-full: $(FULL:.doxy=.tag)
-       $(foreach FILE,$(FULL),doxygen $(FILE);)
-
-SUBSET = \
-       main.doxy \
-       math.doxy
-
-subset: $(SUBSET:.doxy=.tag)
-       $(foreach FILE,$(SUBSET),doxygen $(FILE);)
-
-clean:
-       -rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
-       -rm -rf *.tag
-       -rm -rf *.db
diff --git a/doxygen/header.html.orig b/doxygen/header.html.orig
deleted file mode 100644 (file)
index 034231c..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-<html>
-<head>
-<title>Mesa Source Code Documentation</title>
-<link href="doxygen.css" rel="stylesheet" type="text/css">
-</head>
-<body>
-<div class="qindex">
-<a class="qindex" href="../main/index.html">core</a> |
-<a class="qindex" href="../glapi/index.html">glapi</a> |
-<a class="qindex" href="../glsl/index.html">glsl</a> |
-<a class="qindex" href="../vbo/index.html">vbo</a> |
-<a class="qindex" href="../math/index.html">math</a> |
-<a class="qindex" href="../swrast/index.html">swrast</a> |
-<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> |
-<a class="qindex" href="../tnl/index.html">tnl</a> |
-<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> |
-<a class="qindex" href="../gbm/index.html">gbm</a> |
-<a class="qindex" href="../i965/index.html">i965</a>
-</div>
index 4010cad15c0987af3fd75f1d4595f125e285e4ae..ac5a499c48fbdbe590e5e6cd863cdbb65d31623d 100644 (file)
@@ -12,9 +12,9 @@ all-local : .install-gallium-links
                link_dir=$(top_builddir)/$(LIB_DIR)/egl;        \
        fi;                                                     \
        $(MKDIR_P) $$link_dir;                                  \
-       file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so);           \
-       file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
-       file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
+       file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)";         \
+       file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";       \
+       file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";       \
        for f in $$file_list; do                                \
                if test -h .libs/$$f; then                      \
                        cp -d $$f $$link_dir;                   \
index f37042d9af10d53888256f45660c717dd6e49d8b..94321b2e8473e988b8b2675d92975949b2ee5146 100755 (executable)
@@ -82,11 +82,6 @@ def install_shared_library(env, sources, version = ()):
     return targets
 
 
-def createInstallMethods(env):
-    env.AddMethod(install_program, 'InstallProgram')
-    env.AddMethod(install_shared_library, 'InstallSharedLibrary')
-
-
 def msvc2013_compat(env):
     if env['gcc']:
         env.Append(CCFLAGS = [
@@ -94,8 +89,20 @@ def msvc2013_compat(env):
             '-Werror=pointer-arith',
         ])
 
-def createMSVCCompatMethods(env):
-    env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+
+def unit_test(env, test_name, program_target, args=None):
+    env.InstallProgram(program_target)
+
+    cmd = [program_target[0].abspath]
+    if args is not None:
+        cmd += args
+    cmd = ' '.join(cmd)
+
+    # http://www.scons.org/wiki/UnitTests
+    action = SCons.Action.Action(cmd, "  Running %s ..." % test_name)
+    alias = env.Alias(test_name, program_target, action)
+    env.AlwaysBuild(alias)
+    env.Depends('check', alias)
 
 
 def num_jobs():
@@ -667,8 +674,10 @@ def generate(env):
     
     # Custom builders and methods
     env.Tool('custom')
-    createInstallMethods(env)
-    createMSVCCompatMethods(env)
+    env.AddMethod(install_program, 'InstallProgram')
+    env.AddMethod(install_shared_library, 'InstallSharedLibrary')
+    env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+    env.AddMethod(unit_test, 'UnitTest')
 
     env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
     env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
index 8969d8219846c7df720e19777eae08506c37c1d0..10c79c4434893186973e50d44314c63bbed73bb7 100644 (file)
@@ -22,3 +22,4 @@ compiler = env.ConvenienceLibrary(
 Export('compiler')
 
 SConscript('SConscript.glsl')
+SConscript('SConscript.nir')
diff --git a/src/compiler/SConscript.nir b/src/compiler/SConscript.nir
new file mode 100644 (file)
index 0000000..51cc0d4
--- /dev/null
@@ -0,0 +1,73 @@
+import common
+
+Import('*')
+
+from sys import executable as python_cmd
+
+env = env.Clone()
+
+env.MSVC2013Compat()
+
+env.Prepend(CPPPATH = [
+    '#include',
+    '#src',
+    '#src/mapi',
+    '#src/mesa',
+    '#src/gallium/include',
+    '#src/gallium/auxiliary',
+    '#src/compiler/nir',
+])
+
+# Make generated headers reachable from the include path.
+env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath])
+
+# nir generated sources
+
+nir_builder_opcodes_h = env.CodeGenerate(
+    target = 'nir/nir_builder_opcodes.h',
+    script = 'nir/nir_builder_opcodes_h.py',
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'nir/nir_constant_expressions.c',
+    script = 'nir/nir_constant_expressions.py',
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'nir/nir_opcodes.h',
+    script = 'nir/nir_opcodes_h.py',
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'nir/nir_opcodes.c',
+    script = 'nir/nir_opcodes_c.py',
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'nir/nir_opt_algebraic.c',
+    script = 'nir/nir_opt_algebraic.py',
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+# parse Makefile.sources
+source_lists = env.ParseSourceList('Makefile.sources')
+
+nir_sources = source_lists['NIR_FILES']
+nir_sources += source_lists['NIR_GENERATED_FILES']
+
+nir = env.ConvenienceLibrary(
+    target = 'nir',
+    source = nir_sources,
+)
+
+env.Alias('nir', nir)
+Export('nir')
index fede1954cf0360e971978dd699a334f445275e12..bbbc2089db3f21960f49440fa014be07475afd53 100644 (file)
@@ -507,7 +507,14 @@ typedef struct nir_src {
    bool is_ssa;
 } nir_src;
 
-#define NIR_SRC_INIT (nir_src) { { NULL } }
+static inline nir_src
+nir_src_init(void)
+{
+   nir_src src = { { NULL } };
+   return src;
+}
+
+#define NIR_SRC_INIT nir_src_init()
 
 #define nir_foreach_use(reg_or_ssa_def, src) \
    list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
@@ -530,7 +537,14 @@ typedef struct {
    bool is_ssa;
 } nir_dest;
 
-#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+static inline nir_dest
+nir_dest_init(void)
+{
+   nir_dest dest = { { { NULL } } };
+   return dest;
+}
+
+#define NIR_DEST_INIT nir_dest_init()
 
 #define nir_foreach_def(reg, dest) \
    list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
@@ -957,7 +971,7 @@ typedef enum {
    NIR_INTRINSIC_UCP_ID = 4,
 
    /**
-    * The ammount of data, starting from BASE, that this instruction may
+    * The amount of data, starting from BASE, that this instruction may
     * access.  This is used to provide bounds if the offset is not constant.
     */
    NIR_INTRINSIC_RANGE = 5,
index fa162f9d126927f475f7abbeeb5dcfd06d39d00a..3cb4f95394c6d683ee03862402aaf03a82e84b62 100644 (file)
@@ -42,9 +42,9 @@
 #define ARR(...) { __VA_ARGS__ }
 
 
-INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
+INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
 
 /*
  * Interpolation of input.  The interp_var_at* intrinsics are similar to the
@@ -72,7 +72,7 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
  * around/optimized in general
  */
-#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
+#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
 
 BARRIER(barrier)
 BARRIER(discard)
@@ -89,7 +89,7 @@ BARRIER(memory_barrier)
  * The latter can be used as code motion barrier, which is currently not
  * feasible with NIR.
  */
-INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Memory barrier with semantics analogous to the compute shader
@@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
  *
  * end_primitive implements GLSL's EndPrimitive() built-in.
  */
-INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(emit_vertex,   0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
 
 /**
  * Geometry Shader intrinsics with a vertex count.
@@ -137,7 +137,7 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
  */
 
 #define ATOMIC(name, flags) \
-   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
+   INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
    INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
 
 ATOMIC(inc, 0)
@@ -170,9 +170,9 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
 INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
 INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
 INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
+INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
+INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
@@ -278,7 +278,7 @@ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
 INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
 
 #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
-   INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+   INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
    idx0, idx1, idx2, \
    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
@@ -313,8 +313,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
  * of the start of the variable being loaded and and the offset source is a
  * offset into that variable.
  *
- * Uniform load operations have a second index that specifies the size of the
- * variable being loaded.  If const_index[1] == 0, then the size is unknown.
+ * Uniform load operations have a second "range" index that specifies the
+ * range (starting at base) of the data from which we are loading.  If
+ * const_index[1] == 0, then the range is unknown.
  *
  * Some load operations such as UBO/SSBO load and per_vertex loads take an
  * additional source to specify which UBO/SSBO/vertex to load from.
@@ -328,9 +329,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
 #define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
    INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
 
-/* src[] = { offset }. const_index[] = { base, range } */
-LOAD(uniform, 1, 2, BASE, RANGE, xx,
-     NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { buffer_index, offset }. No const_index */
 LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { offset }. const_index[] = { base } */
index 369a8ee537ed7d6a24ec16eb168ea5aeaca94840..df1f7a5d76535346d5ef2ff9655c4b7599ea5295 100644 (file)
@@ -278,8 +278,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
             intrin->variables[0]->var->data.driver_location);
 
          if (load->intrinsic == nir_intrinsic_load_uniform) {
-            load->const_index[1] =
-               state->type_size(intrin->variables[0]->var->type);
+            nir_intrinsic_set_range(load,
+               state->type_size(intrin->variables[0]->var->type));
          }
 
          if (per_vertex)
index 2793020953ecf64b61153e8f25c195d40c8c944f..bbb4edf326056e4d7df641524e7acc96f804a16e 100644 (file)
 #include <stdlib.h>
 #include <inttypes.h> /* for PRIx64 macro */
 
+#if defined(_WIN32) && !defined(snprintf)
+#define snprintf _snprintf
+#endif
+
 static void
 print_tabs(unsigned num_tabs, FILE *fp)
 {
@@ -514,8 +518,6 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_STREAM_ID] = "stream-id",
       [NIR_INTRINSIC_UCP_ID] = "ucp-id",
       [NIR_INTRINSIC_RANGE] = "range",
-      [NIR_INTRINSIC_DESC_SET] = "desc-set",
-      [NIR_INTRINSIC_BINDING] = "binding",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
index 23d709a218a5652ef504f4181fe9dc43e0874fe8..1a772fff2d5ad94b6ce06156f29cb290552339ef 100644 (file)
@@ -27,7 +27,6 @@
 
 #include "nir.h"
 #include <stdlib.h>
-#include <unistd.h>
 
 /*
  * Implements the classic to-SSA algorithm described by Cytron et. al. in
index 086e1701128b30d0149c95d8f4b2d24083123cf7..ef2bc1016d59545156da60b40b1848c5be5ae9a0 100644 (file)
@@ -80,8 +80,6 @@ endif
 
 if HAVE_GALLIUM_SWR
 SUBDIRS += drivers/swr
-SUBDIRS += drivers/swr/avx
-SUBDIRS += drivers/swr/avx2
 endif
 
 ## vc4/rpi
index 8188156afc2f736620faaf765a1f5dd0eb81a31a..fbbd22a52999bde27f7df429a4ed13d37dfce814 100644 (file)
@@ -38,10 +38,6 @@ if not env['embedded']:
             target = testname,
             source = [testname + '.c', 'lp_test_main.c'],
         )
-        env.InstallProgram(target)
-        
-        # http://www.scons.org/wiki/UnitTests
-        alias = env.Alias(testname, [target], target[0].abspath)
-        AlwaysBuild(alias)
+        env.UnitTest(testname, target)
 
 Export('llvmpipe')
index 3479c343261d64c07cad3e38a52d27a2fdff4863..fe4982aa9fd8c5fe5e997f0c2f3b61edcd39ac2a 100644 (file)
@@ -202,6 +202,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVE4_COMPUTE_CLASS                                     0x0000a0c0
 #define NVF0_COMPUTE_CLASS                                     0x0000a1c0
 #define GM107_COMPUTE_CLASS                            0x0000b0c0
+#define GM200_COMPUTE_CLASS                            0x0000b1c0
 #define NV84_CRYPT_CLASS                                       0x000074c1
 #define BLOB_NVC0_PCOPY1_CLASS                                 0x000090b8
 #define BLOB_NVC0_PCOPY0_CLASS                                 0x000090b5
index 9a34007c6e513c58dfbb426578d5bca5d5faad91..3bf98ad6a3b548e58425106bdbc8acdb83d5c136 100644 (file)
@@ -644,9 +644,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
    case 0xf0:
    case 0x100:
    case 0x110:
+   case 0x120:
       if (debug_get_bool_option("NVF0_COMPUTE", false))
          return nve4_screen_compute_setup(screen, screen->base.pushbuf);
-   case 0x120:
       return 0;
    default:
       return -1;
index 4d069df983e06cc24226e00c11a20f45781553e8..3d0190928e3667c26ec43a36f92d0f21f9861327 100644 (file)
@@ -54,6 +54,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    case 0x110:
       obj_class = GM107_COMPUTE_CLASS;
       break;
+   case 0x120:
+      obj_class = GM200_COMPUTE_CLASS;
+      break;
    default:
       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
       return -1;
index 47514e91d2340d60bd6050914a74a360a7492ae5..664dc5bfdce19756c9dab65e159ab52646a581c9 100644 (file)
@@ -376,6 +376,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                       0, 0, resource, level, box);
 
                        data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+                       if (!data) {
+                               pipe_resource_reference((struct pipe_resource **)&staging, NULL);
+                               return NULL;
+                       }
                        data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 
                        return r600_buffer_get_transfer(ctx, resource, level, usage, box,
index b5557d800c76445afb6d4f156a5a62e1a20c8c28..7fc1461ec9b6d3ea78194cc87702741ea45d4a40 100644 (file)
@@ -1192,7 +1192,9 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
 {
        sctx->const_buffers[shader].desc.pointer_dirty = true;
        sctx->rw_buffers[shader].desc.pointer_dirty = true;
+       sctx->shader_buffers[shader].desc.pointer_dirty = true;
        sctx->samplers[shader].views.desc.pointer_dirty = true;
+       sctx->images[shader].desc.pointer_dirty = true;
 
        if (shader == PIPE_SHADER_VERTEX)
                sctx->vertex_buffers.pointer_dirty = true;
index c58467ddcb07dc8148cf91d5bb35525fb38dd485..c26960b1ca3d508fee59fb23bc0abb09b31d53a6 100644 (file)
@@ -5839,6 +5839,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
        radeon_llvm_dispose(&ctx.radeon_bld);
 
+       /* Add the scratch offset to input SGPRs. */
+       if (shader->config.scratch_bytes_per_wave)
+               shader->info.num_input_sgprs += 1; /* scratch byte offset */
+
        /* Calculate the number of fragment input VGPRs. */
        if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
                shader->info.num_input_vgprs = 0;
@@ -6761,6 +6765,13 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        return true;
 }
 
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+       unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+       shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
@@ -6850,6 +6861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                }
        }
 
+       si_fix_num_sgprs(shader);
        si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
                       stderr);
 
index 82ae4c43245e3122ce8471f5043d7d542cd98d2d..af9ffdd381fd4b4cbcb4e536af7ada06f2a4a8fa 100644 (file)
@@ -1487,7 +1487,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
        }
 
        if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
-           sscreen->b.family >= CHIP_STONEY) {
+           sscreen->b.family == CHIP_STONEY) {
                switch (format) {
                case PIPE_FORMAT_ETC1_RGB8:
                case PIPE_FORMAT_ETC2_RGB8:
index b7ebb48e6a9a403c335b17a46eb1ec4616963e13..d560aae1f7dc78db2d323c4447c0a5f8e33bbbdf 100644 (file)
@@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader,
 static void si_shader_ls(struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
-       unsigned num_sgprs, num_user_sgprs;
        unsigned vgpr_comp_cnt;
        uint64_t va;
 
@@ -321,30 +320,21 @@ static void si_shader_ls(struct si_shader *shader)
         * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
        vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
-       num_user_sgprs = SI_LS_NUM_USER_SGPR;
-       num_sgprs = shader->config.num_sgprs;
-       if (num_user_sgprs > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 2;
-       }
-       assert(num_sgprs <= 104);
-
        si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
        si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
 
        shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                          S_00B528_SGPRS((num_sgprs - 1) / 8) |
+                          S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
                           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
                           S_00B528_DX10_CLAMP(1) |
                           S_00B528_FLOAT_MODE(shader->config.float_mode);
-       shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+       shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
                           S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
-       unsigned num_sgprs, num_user_sgprs;
        uint64_t va;
 
        pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -354,32 +344,22 @@ static void si_shader_hs(struct si_shader *shader)
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
-       num_user_sgprs = SI_TCS_NUM_USER_SGPR;
-       num_sgprs = shader->config.num_sgprs;
-       /* One SGPR after user SGPRs is pre-loaded with tessellation factor
-        * buffer offset. */
-       if ((num_user_sgprs + 1) > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 1 + 2;
-       }
-       assert(num_sgprs <= 104);
-
        si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
        si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
        si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
                       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B428_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B428_DX10_CLAMP(1) |
                       S_00B428_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
-                      S_00B42C_USER_SGPR(num_user_sgprs) |
+                      S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
                       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_es(struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
-       unsigned num_sgprs, num_user_sgprs;
+       unsigned num_user_sgprs;
        unsigned vgpr_comp_cnt;
        uint64_t va;
 
@@ -400,21 +380,13 @@ static void si_shader_es(struct si_shader *shader)
        } else
                unreachable("invalid shader selector type");
 
-       num_sgprs = shader->config.num_sgprs;
-       /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
-       if ((num_user_sgprs + 1) > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 1 + 2;
-       }
-       assert(num_sgprs <= 104);
-
        si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                       shader->selector->esgs_itemsize / 4);
        si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
        si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
        si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
                       S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B328_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
                       S_00B328_DX10_CLAMP(1) |
                       S_00B328_FLOAT_MODE(shader->config.float_mode));
@@ -458,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader)
        unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
        unsigned gs_num_invocations = shader->selector->gs_num_invocations;
        struct si_pm4_state *pm4;
-       unsigned num_sgprs, num_user_sgprs;
        uint64_t va;
        unsigned max_stream = shader->selector->max_gs_stream;
 
@@ -494,22 +465,13 @@ static void si_shader_gs(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
        si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
 
-       num_user_sgprs = SI_GS_NUM_USER_SGPR;
-       num_sgprs = shader->config.num_sgprs;
-       /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
-       if ((num_user_sgprs + 2) > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 2 + 2;
-       }
-       assert(num_sgprs <= 104);
-
        si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
                       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B228_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B228_DX10_CLAMP(1) |
                       S_00B228_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
-                      S_00B22C_USER_SGPR(num_user_sgprs) |
+                      S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) |
                       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
@@ -523,7 +485,7 @@ static void si_shader_gs(struct si_shader *shader)
 static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 {
        struct si_pm4_state *pm4;
-       unsigned num_sgprs, num_user_sgprs;
+       unsigned num_user_sgprs;
        unsigned nparams, vgpr_comp_cnt;
        uint64_t va;
        unsigned window_space =
@@ -566,13 +528,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
        } else
                unreachable("invalid shader selector type");
 
-       num_sgprs = shader->config.num_sgprs;
-       if (num_user_sgprs > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 2;
-       }
-       assert(num_sgprs <= 104);
-
        /* VS is required to export at least one param. */
        nparams = MAX2(shader->info.nr_param_exports, 1);
        si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@@ -594,7 +549,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
        si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
        si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
                       S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B128_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
                       S_00B128_DX10_CLAMP(1) |
                       S_00B128_FLOAT_MODE(shader->config.float_mode));
@@ -684,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader)
        struct tgsi_shader_info *info = &shader->selector->info;
        struct si_pm4_state *pm4;
        unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
-       unsigned num_sgprs, num_user_sgprs;
        unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
        uint64_t va;
        bool has_centroid;
@@ -771,23 +725,14 @@ static void si_shader_ps(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
        si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
 
-       num_user_sgprs = SI_PS_NUM_USER_SGPR;
-       num_sgprs = shader->config.num_sgprs;
-       /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
-       if ((num_user_sgprs + 1) > num_sgprs) {
-               /* Last 2 reserved SGPRs are used for VCC */
-               num_sgprs = num_user_sgprs + 1 + 2;
-       }
-       assert(num_sgprs <= 104);
-
        si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
                       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B028_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B028_DX10_CLAMP(1) |
                       S_00B028_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
                       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
-                      S_00B02C_USER_SGPR(num_user_sgprs) |
+                      S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
                       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
        /* Prefer RE_Z if the shader is complex enough. The requirement is either:
index f08806aaf77d59bd7aebc98b6ad3bc1bfc03f018..d6d6e7dc611cb6292bde95e249030d70e9bee509 100644 (file)
@@ -28,4 +28,96 @@ noinst_LTLIBRARIES = libmesaswr.la
 
 libmesaswr_la_SOURCES = $(LOADER_SOURCES)
 
-EXTRA_DIST = Makefile.sources-arch
+COMMON_CXXFLAGS = \
+       $(GALLIUM_DRIVER_CFLAGS) \
+       $(LLVM_CFLAGS) \
+       -I$(builddir)/rasterizer/scripts \
+       -I$(builddir)/rasterizer/jitter \
+       -I$(srcdir)/rasterizer \
+       -I$(srcdir)/rasterizer/core \
+       -I$(srcdir)/rasterizer/jitter
+
+COMMON_SOURCES = \
+       $(CXX_SOURCES) \
+       $(COMMON_CXX_SOURCES) \
+       $(CORE_CXX_SOURCES) \
+       $(JITTER_CXX_SOURCES) \
+       $(MEMORY_CXX_SOURCES) \
+       $(BUILT_SOURCES)
+
+BUILT_SOURCES = \
+       rasterizer/scripts/gen_knobs.cpp \
+       rasterizer/scripts/gen_knobs.h \
+       rasterizer/jitter/state_llvm.h \
+       rasterizer/jitter/builder_gen.h \
+       rasterizer/jitter/builder_gen.cpp \
+       rasterizer/jitter/builder_x86.h \
+       rasterizer/jitter/builder_x86.cpp
+
+rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/scripts/gen_knobs.py \
+               rasterizer/scripts
+
+rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
+               --input $(srcdir)/rasterizer/core/state.h \
+               --output rasterizer/jitter/state_llvm.h
+
+rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+               --output rasterizer/jitter/builder_gen.h \
+               --gen_h
+
+rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+               --output rasterizer/jitter/builder_gen.cpp \
+               --gen_cpp
+
+rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+               --output rasterizer/jitter/builder_x86.h \
+               --gen_x86_h
+
+rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+       $(PYTHON2) $(PYTHON_FLAGS) \
+               $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+               --output rasterizer/jitter/builder_x86.cpp \
+               --gen_x86_cpp
+
+
+COMMON_LIBADD = \
+       $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+       $(top_builddir)/src/mesa/libmesagallium.la
+
+lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
+
+libswrAVX_la_CXXFLAGS = \
+       -march=core-avx-i \
+       -DKNOB_ARCH=KNOB_ARCH_AVX \
+       $(COMMON_CXXFLAGS)
+
+libswrAVX_la_SOURCES = \
+       $(COMMON_SOURCES)
+
+libswrAVX_la_LIBADD = \
+       $(COMMON_LIBADD)
+
+libswrAVX2_la_CXXFLAGS = \
+       -march=core-avx2 \
+       -DKNOB_ARCH=KNOB_ARCH_AVX2 \
+       $(COMMON_CXXFLAGS)
+
+libswrAVX2_la_SOURCES = \
+       $(COMMON_SOURCES)
+
+libswrAVX2_la_LIBADD = \
+       $(COMMON_LIBADD)
+
+include $(top_srcdir)/install-gallium-links.mk
index 72247211184ea2ed68d71e4191fa7e2f3e11dcc4..be3d8f027f8bdbf5a8d63bddde0d2f2fe8b1a6ce 100644 (file)
 
 LOADER_SOURCES := \
        swr_loader.cpp
+
+CXX_SOURCES := \
+       swr_clear.cpp \
+       swr_context.cpp \
+       swr_context.h \
+       swr_context_llvm.h \
+       swr_draw.cpp \
+       swr_public.h \
+       swr_resource.h \
+       swr_screen.cpp \
+       swr_screen.h \
+       swr_state.cpp \
+       swr_state.h \
+       swr_tex_sample.cpp \
+       swr_tex_sample.h \
+       swr_scratch.h \
+       swr_scratch.cpp \
+       swr_shader.cpp \
+       swr_memory.h \
+       swr_fence.h \
+       swr_fence.cpp \
+       swr_query.h \
+       swr_query.cpp
+
+COMMON_CXX_SOURCES := \
+       rasterizer/common/containers.hpp \
+       rasterizer/common/formats.cpp \
+       rasterizer/common/formats.h \
+       rasterizer/common/isa.hpp \
+       rasterizer/common/os.h \
+       rasterizer/common/rdtsc_buckets.cpp \
+       rasterizer/common/rdtsc_buckets.h \
+       rasterizer/common/rdtsc_buckets_shared.h \
+       rasterizer/common/rdtsc_buckets_shared.h \
+       rasterizer/common/simdintrin.h \
+       rasterizer/common/swr_assert.cpp \
+       rasterizer/common/swr_assert.h
+
+CORE_CXX_SOURCES := \
+       rasterizer/core/api.cpp \
+       rasterizer/core/api.h \
+       rasterizer/core/arena.h \
+       rasterizer/core/backend.cpp \
+       rasterizer/core/backend.h \
+       rasterizer/core/blend.h \
+       rasterizer/core/clip.cpp \
+       rasterizer/core/clip.h \
+       rasterizer/core/context.h \
+       rasterizer/core/depthstencil.h \
+       rasterizer/core/fifo.hpp \
+       rasterizer/core/format_traits.h \
+       rasterizer/core/format_types.h \
+       rasterizer/core/frontend.cpp \
+       rasterizer/core/frontend.h \
+       rasterizer/core/knobs.h \
+       rasterizer/core/knobs_init.h \
+       rasterizer/core/multisample.cpp \
+       rasterizer/core/multisample.h \
+       rasterizer/core/pa_avx.cpp \
+       rasterizer/core/pa.h \
+       rasterizer/core/rasterizer.cpp \
+       rasterizer/core/rasterizer.h \
+       rasterizer/core/rdtsc_core.cpp \
+       rasterizer/core/rdtsc_core.h \
+       rasterizer/core/ringbuffer.h \
+       rasterizer/core/state.h \
+       rasterizer/core/threads.cpp \
+       rasterizer/core/threads.h \
+       rasterizer/core/tilemgr.cpp \
+       rasterizer/core/tilemgr.h \
+       rasterizer/core/utils.cpp \
+       rasterizer/core/utils.h
+
+JITTER_CXX_SOURCES := \
+       rasterizer/jitter/blend_jit.cpp \
+       rasterizer/jitter/blend_jit.h \
+       rasterizer/jitter/builder.cpp \
+       rasterizer/jitter/builder.h \
+       rasterizer/jitter/builder_misc.cpp \
+       rasterizer/jitter/builder_misc.h \
+       rasterizer/jitter/fetch_jit.cpp \
+       rasterizer/jitter/fetch_jit.h \
+       rasterizer/jitter/JitManager.cpp \
+       rasterizer/jitter/JitManager.h \
+       rasterizer/jitter/streamout_jit.cpp \
+       rasterizer/jitter/streamout_jit.h
+
+MEMORY_CXX_SOURCES := \
+       rasterizer/memory/ClearTile.cpp \
+       rasterizer/memory/LoadTile.cpp \
+       rasterizer/memory/StoreTile.cpp
diff --git a/src/gallium/drivers/swr/Makefile.sources-arch b/src/gallium/drivers/swr/Makefile.sources-arch
deleted file mode 100644 (file)
index a04b120..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-CXX_SOURCES := \
-       swr_clear.cpp \
-       swr_context.cpp \
-       swr_context.h \
-       swr_context_llvm.h \
-       swr_draw.cpp \
-       swr_public.h \
-       swr_resource.h \
-       swr_screen.cpp \
-       swr_screen.h \
-       swr_state.cpp \
-       swr_state.h \
-       swr_tex_sample.cpp \
-       swr_tex_sample.h \
-       swr_scratch.h \
-       swr_scratch.cpp \
-       swr_shader.cpp \
-       swr_memory.h \
-       swr_fence.h \
-       swr_fence.cpp \
-       swr_query.h \
-       swr_query.cpp
-
-COMMON_CXX_SOURCES := \
-       rasterizer/common/containers.hpp \
-       rasterizer/common/formats.cpp \
-       rasterizer/common/formats.h \
-       rasterizer/common/isa.hpp \
-       rasterizer/common/os.h \
-       rasterizer/common/rdtsc_buckets.cpp \
-       rasterizer/common/rdtsc_buckets.h \
-       rasterizer/common/rdtsc_buckets_shared.h \
-       rasterizer/common/rdtsc_buckets_shared.h \
-       rasterizer/common/simdintrin.h \
-       rasterizer/common/swr_assert.cpp \
-       rasterizer/common/swr_assert.h
-
-CORE_CXX_SOURCES := \
-       rasterizer/core/api.cpp \
-       rasterizer/core/api.h \
-       rasterizer/core/arena.h \
-       rasterizer/core/backend.cpp \
-       rasterizer/core/backend.h \
-       rasterizer/core/blend.h \
-       rasterizer/core/clip.cpp \
-       rasterizer/core/clip.h \
-       rasterizer/core/context.h \
-       rasterizer/core/depthstencil.h \
-       rasterizer/core/fifo.hpp \
-       rasterizer/core/format_traits.h \
-       rasterizer/core/format_types.h \
-       rasterizer/core/frontend.cpp \
-       rasterizer/core/frontend.h \
-       rasterizer/core/knobs.h \
-       rasterizer/core/knobs_init.h \
-       rasterizer/core/multisample.cpp \
-       rasterizer/core/multisample.h \
-       rasterizer/core/pa_avx.cpp \
-       rasterizer/core/pa.h \
-       rasterizer/core/rasterizer.cpp \
-       rasterizer/core/rasterizer.h \
-       rasterizer/core/rdtsc_core.cpp \
-       rasterizer/core/rdtsc_core.h \
-       rasterizer/core/ringbuffer.h \
-       rasterizer/core/state.h \
-       rasterizer/core/threads.cpp \
-       rasterizer/core/threads.h \
-       rasterizer/core/tilemgr.cpp \
-       rasterizer/core/tilemgr.h \
-       rasterizer/core/utils.cpp \
-       rasterizer/core/utils.h
-
-JITTER_CXX_SOURCES := \
-       rasterizer/jitter/blend_jit.cpp \
-       rasterizer/jitter/blend_jit.h \
-       rasterizer/jitter/builder.cpp \
-       rasterizer/jitter/builder.h \
-       rasterizer/jitter/builder_misc.cpp \
-       rasterizer/jitter/builder_misc.h \
-       rasterizer/jitter/fetch_jit.cpp \
-       rasterizer/jitter/fetch_jit.h \
-       rasterizer/jitter/JitManager.cpp \
-       rasterizer/jitter/JitManager.h \
-       rasterizer/jitter/streamout_jit.cpp \
-       rasterizer/jitter/streamout_jit.h
-
-MEMORY_CXX_SOURCES := \
-       rasterizer/memory/ClearTile.cpp \
-       rasterizer/memory/LoadTile.cpp \
-       rasterizer/memory/StoreTile.cpp
diff --git a/src/gallium/drivers/swr/avx/Makefile.am b/src/gallium/drivers/swr/avx/Makefile.am
deleted file mode 100644 (file)
index 384f1a7..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
-       -march=core-avx-i \
-       -DKNOB_ARCH=KNOB_ARCH_AVX \
-       $(GALLIUM_DRIVER_CFLAGS) \
-       $(LLVM_CFLAGS) \
-       -I$(builddir)/rasterizer/scripts \
-       -I$(builddir)/rasterizer/jitter \
-       -I$(srcdir)/../rasterizer \
-       -I$(srcdir)/../rasterizer/core \
-       -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX.la
-
-BUILT_SOURCES = \
-       rasterizer/scripts/gen_knobs.cpp \
-       rasterizer/scripts/gen_knobs.h \
-       rasterizer/jitter/state_llvm.h \
-       rasterizer/jitter/builder_gen.h \
-       rasterizer/jitter/builder_gen.cpp \
-       rasterizer/jitter/builder_x86.h \
-       rasterizer/jitter/builder_x86.cpp
-
-libswrAVX_la_SOURCES = \
-       $(CXX_SOURCES) \
-       $(COMMON_CXX_SOURCES) \
-       $(CORE_CXX_SOURCES) \
-       $(JITTER_CXX_SOURCES) \
-       $(MEMORY_CXX_SOURCES) \
-       $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/scripts/gen_knobs.py \
-               rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
-               --input $(srcdir)/../rasterizer/core/state.h \
-               --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
-               --output rasterizer/jitter/builder_gen.h \
-               --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
-               --output rasterizer/jitter/builder_gen.cpp \
-               --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --output rasterizer/jitter/builder_x86.h \
-               --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --output rasterizer/jitter/builder_x86.cpp \
-               --gen_x86_cpp
-
-
-libswrAVX_la_LIBADD = \
-       $(top_builddir)/src/gallium/auxiliary/libgallium.la \
-       $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
diff --git a/src/gallium/drivers/swr/avx2/Makefile.am b/src/gallium/drivers/swr/avx2/Makefile.am
deleted file mode 100644 (file)
index a3968ec..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
-       -march=core-avx2 \
-       -DKNOB_ARCH=KNOB_ARCH_AVX2 \
-       $(GALLIUM_DRIVER_CFLAGS) \
-       $(LLVM_CFLAGS) \
-       -I$(builddir)/rasterizer/scripts \
-       -I$(builddir)/rasterizer/jitter \
-       -I$(srcdir)/../rasterizer \
-       -I$(srcdir)/../rasterizer/core \
-       -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX2.la
-
-BUILT_SOURCES = \
-       rasterizer/scripts/gen_knobs.cpp \
-       rasterizer/scripts/gen_knobs.h \
-       rasterizer/jitter/state_llvm.h \
-       rasterizer/jitter/builder_gen.h \
-       rasterizer/jitter/builder_gen.cpp \
-       rasterizer/jitter/builder_x86.h \
-       rasterizer/jitter/builder_x86.cpp
-
-libswrAVX2_la_SOURCES = \
-       $(CXX_SOURCES) \
-       $(COMMON_CXX_SOURCES) \
-       $(CORE_CXX_SOURCES) \
-       $(JITTER_CXX_SOURCES) \
-       $(MEMORY_CXX_SOURCES) \
-       $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/scripts/gen_knobs.py \
-               rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
-               --input $(srcdir)/../rasterizer/core/state.h \
-               --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
-               --output rasterizer/jitter/builder_gen.h \
-               --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
-               --output rasterizer/jitter/builder_gen.cpp \
-               --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --output rasterizer/jitter/builder_x86.h \
-               --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
-       $(PYTHON2) $(PYTHON_FLAGS) \
-               $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
-               --output rasterizer/jitter/builder_x86.cpp \
-               --gen_x86_cpp
-
-
-libswrAVX2_la_LIBADD = \
-       $(top_builddir)/src/gallium/auxiliary/libgallium.la \
-       $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
index a816e2fea41fd5d4964f594817b5929b61303cdd..0650804018bedff1a4a10d010f0f27fc463415bb 100644 (file)
@@ -24,10 +24,8 @@ for progname in progs:
         target = progname,
         source = progname + '.c',
     )
-    
-    env.Alias(progname, env.InstallProgram(prog))
-
-    # http://www.scons.org/wiki/UnitTests
-    test_alias = env.Alias('unit', [prog], prog[0].abspath)
-    AlwaysBuild(test_alias)
-
+    if progname not in [
+        'u_cache_test', # too long
+        'translate_test', # unreliable
+    ]:
+       env.UnitTest(progname, prog)
index bab6acadb801c507cc584226d6fce50d2ee82db6..bb7989a79fd48dc756044368551ce1b81eaed0d8 100644 (file)
 
 
 #include <stdio.h>
+#include <stdlib.h>
 
 #include "os/os_thread.h"
 #include "os/os_time.h"
+#include "util/u_atomic.h"
 
 
 #define NUM_THREADS 10
 
+static int verbosity = 0;
+
 static pipe_thread threads[NUM_THREADS];
 static pipe_barrier barrier;
 static int thread_ids[NUM_THREADS];
 
+static volatile int waiting = 0;
+static volatile int proceeded = 0;
+
+
+#define LOG(fmt, ...) \
+   if (verbosity > 0) { \
+      fprintf(stdout, fmt, ##__VA_ARGS__); \
+   }
+
+#define CHECK(_cond) \
+   if (!(_cond)) { \
+      fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \
+      _exit(EXIT_FAILURE); \
+   }
+
 
 static PIPE_THREAD_ROUTINE(thread_function, thread_data)
 {
    int thread_id = *((int *) thread_data);
 
-   printf("thread %d starting\n", thread_id);
-   os_time_sleep(thread_id * 1000 * 1000);
-   printf("thread %d before barrier\n", thread_id);
+   LOG("thread %d starting\n", thread_id);
+   os_time_sleep(thread_id * 100 * 1000);
+   LOG("thread %d before barrier\n", thread_id);
+
+   CHECK(p_atomic_read(&proceeded) == 0);
+   p_atomic_inc(&waiting);
+
    pipe_barrier_wait(&barrier);
-   printf("thread %d exiting\n", thread_id);
+
+   CHECK(p_atomic_read(&waiting) == NUM_THREADS);
+
+   p_atomic_inc(&proceeded);
+
+   LOG("thread %d exiting\n", thread_id);
 
    return 0;
 }
 
 
-int main()
+int main(int argc, char *argv[])
 {
    int i;
 
-   printf("pipe_barrier_test starting\n");
+   for (i = 1; i < argc; ++i) {
+      const char *arg = argv[i];
+      if (strcmp(arg, "-v") == 0) {
+         ++verbosity;
+      } else {
+         fprintf(stderr, "error: unrecognized option `%s`\n", arg);
+         exit(EXIT_FAILURE);
+      }
+   }
+
+   // Disable buffering
+   setbuf(stdout, NULL);
+
+   LOG("pipe_barrier_test starting\n");
 
    pipe_barrier_init(&barrier, NUM_THREADS);
 
@@ -78,9 +119,11 @@ int main()
       pipe_thread_wait(threads[i]);
    }
 
+   CHECK(p_atomic_read(&proceeded) == NUM_THREADS);
+
    pipe_barrier_destroy(&barrier);
 
-   printf("pipe_barrier_test exiting\n");
+   LOG("pipe_barrier_test exiting\n");
 
    return 0;
 }
index fc49862c2caf390b95d97061a451b85cb87e68a8..7ca606750a964099f4dc45dc777bb740b9a9dbab 100644 (file)
@@ -70,8 +70,9 @@ int main(int argc, char** argv)
 
    util_cpu_detect();
 
-   if(argc <= 1)
-   {}
+   if (argc <= 1 ||
+       !strcmp(argv[1], "default") )
+      create_fn = translate_create;
    else if (!strcmp(argv[1], "generic"))
       create_fn = translate_generic_create;
    else if (!strcmp(argv[1], "x86"))
@@ -129,7 +130,7 @@ int main(int argc, char** argv)
 
    if (!create_fn)
    {
-      printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
+      printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
       return 2;
    }
 
index 3f307f4ef70cb38b70b44ac6d4c80a1f5a4cf3f7..aedb5a23f02ab3d2603eaaa05e602ff21bf6eb7c 100644 (file)
@@ -1988,10 +1988,11 @@ fs_visitor::assign_constant_locations()
     */
    const unsigned int max_push_components = 16 * 8;
 
-   /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
-    * 128 bytes, which is the maximum vulkan push constant size.
+   /* We push small arrays, but no bigger than 16 floats.  This is big enough
+    * for a vec4 but hopefully not large enough to push out other stuff.  We
+    * should probably use a better heuristic at some point.
     */
-   const unsigned int max_chunk_size = 32;
+   const unsigned int max_chunk_size = 16;
 
    unsigned int num_push_constants = 0;
    unsigned int num_pull_constants = 0;
@@ -2018,8 +2019,14 @@ fs_visitor::assign_constant_locations()
       if (!contiguous[u]) {
          unsigned chunk_size = u - chunk_start + 1;
 
-         if (num_push_constants + chunk_size <= max_push_components &&
-             chunk_size <= max_chunk_size) {
+         /* Decide whether we should push or pull this parameter.  In the
+          * Vulkan driver, push constants are explicitly exposed via the API
+          * so we push everything.  In GL, we only push small arrays.
+          */
+         if (stage_prog_data->pull_param == NULL ||
+             (num_push_constants + chunk_size <= max_push_components &&
+              chunk_size <= max_chunk_size)) {
+            assert(num_push_constants + chunk_size <= max_push_components);
             for (unsigned j = chunk_start; j <= u; j++)
                push_constant_loc[j] = num_push_constants++;
          } else {
@@ -4515,7 +4522,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
 
    case SHADER_OPCODE_MOV_INDIRECT:
       /* Prior to Broadwell, we only have 8 address subregisters */
-      return devinfo->gen < 8 ? 8 : inst->exec_size;
+      return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
 
    default:
       return inst->exec_size;
index ae80832544b08d6e578617c726ae7c329bef7234..851cccf0f7c5437a0f4b61a94330ac4ea956bbd8 100644 (file)
@@ -367,29 +367,53 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
       /* The destination stride of an instruction (in bytes) must be greater
        * than or equal to the size of the rest of the instruction.  Since the
        * address register is of type UW, we can't use a D-type instruction.
-       * In order to get around this, re re-type to UW and use a stride.
+       * In order to get around this, re retype to UW and use a stride.
        */
       indirect_byte_offset =
          retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
 
+      struct brw_reg ind_src;
       if (devinfo->gen < 8) {
-         /* Prior to broadwell, we have a restriction that the bottom 5 bits
-          * of the base offset and the bottom 5 bits of the indirect must add
-          * to less than 32.  In other words, the hardware needs to be able to
-          * add the bottom five bits of the two to get the subnumber and add
-          * the next 7 bits of each to get the actual register number.  Since
-          * the indirect may cause us to cross a register boundary, this makes
-          * it almost useless.  We could try and do something clever where we
-          * use a actual base offset if base_offset % 32 == 0 but that would
-          * mean we were generating different code depending on the base
-          * offset.  Instead, for the sake of consistency, we'll just do the
-          * add ourselves.
+         /* From the Haswell PRM section "Register Region Restrictions":
+          *
+          *    "The lower bits of the AddressImmediate must not overflow to
+          *    change the register address.  The lower 5 bits of Address
+          *    Immediate when added to lower 5 bits of address register gives
+          *    the sub-register offset. The upper bits of Address Immediate
+          *    when added to upper bits of address register gives the register
+          *    address. Any overflow from sub-register offset is dropped."
+          *
+          * This restriction is only listed in the Haswell PRM but emperical
+          * testing indicates that it applies on all older generations and is
+          * lifted on Broadwell.
+          *
+          * Since the indirect may cause us to cross a register boundary, this
+          * makes the base offset almost useless.  We could try and do
+          * something clever where we use a actual base offset if
+          * base_offset % 32 == 0 but that would mean we were generating
+          * different code depending on the base offset.  Instead, for the
+          * sake of consistency, we'll just do the add ourselves.
           */
          brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
-         brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+         ind_src = brw_VxH_indirect(0, 0);
       } else {
          brw_MOV(p, addr, indirect_byte_offset);
-         brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+         ind_src = brw_VxH_indirect(0, imm_byte_offset);
+      }
+
+      brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
+
+      if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
+          !inst->get_next()->is_tail_sentinel() &&
+          ((fs_inst *)inst->get_next())->mlen > 0) {
+         /* From the Sandybridge PRM:
+          *
+          *    "[Errata: DevSNB(SNB)] If MRF register is updated by any
+          *    instruction that “indexed/indirect” source AND is followed by a
+          *    send, the instruction requires a “Switch”. This is to avoid
+          *    race condition where send may dispatch before MRF is updated."
+          */
+         brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
       }
    }
 }
index ab564bbcb9eee596f31da32dbacc5ed44da10e99..c16f1ed5477b1370bf9377f0f64104e82626f6ae 100644 (file)
@@ -2743,7 +2743,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset == NULL) {
          fs_reg base_offset = retype(get_nir_src(instr->src[1]),
-                                     BRW_REGISTER_TYPE_D);
+                                     BRW_REGISTER_TYPE_UD);
 
          for (int i = 0; i < instr->num_components; i++)
             VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
index 6143f65efa139b7d52c4c28369ed33ff6ce0b8ba..11db159109fbcdf7be4db6a92f42e3664782171d 100644 (file)
@@ -285,7 +285,7 @@ public:
    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
                                dst_reg dst,
                                src_reg orig_src,
-                               int base_offset,
+                                int base_offset,
                                 src_reg indirect);
    void emit_pull_constant_load_reg(dst_reg dst,
                                     src_reg surf_index,
index 33c5f07cec9aea10b5e890e47d6fc039ff64658c..4b12a72910e1df2b70c92044fbdb14fbb1c2735d 100644 (file)
@@ -758,7 +758,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
       pull->mlen = 2;
       pull->header_size = 1;
    } else if (devinfo->gen >= 7) {
-      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+      dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
 
       grf_offset.type = offset_reg.type;
 
@@ -1587,21 +1587,21 @@ vec4_visitor::move_grf_array_access_to_scratch()
 void
 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
                                      dst_reg temp, src_reg orig_src,
-                                     int base_offset, src_reg indirect)
+                                      int base_offset, src_reg indirect)
 {
    int reg_offset = base_offset + orig_src.reg_offset;
    const unsigned index = prog_data->base.binding_table.pull_constants_start;
 
    src_reg offset;
    if (indirect.file != BAD_FILE) {
-      offset = src_reg(this, glsl_type::int_type);
+      offset = src_reg(this, glsl_type::uint_type);
 
       emit_before(block, inst, ADD(dst_reg(offset), indirect,
-                                   brw_imm_d(reg_offset * 16)));
+                                   brw_imm_ud(reg_offset * 16)));
    } else if (devinfo->gen >= 8) {
       /* Store the offset in a GRF so we can send-from-GRF. */
-      offset = src_reg(this, glsl_type::int_type);
-      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+      offset = src_reg(this, glsl_type::uint_type);
+      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
    } else {
       offset = brw_imm_d(reg_offset * 16);
    }
@@ -1629,6 +1629,12 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 void
 vec4_visitor::move_uniform_array_access_to_pull_constants()
 {
+   /* The vulkan dirver doesn't support pull constants other than UBOs so
+    * everything has to be pushed regardless.
+    */
+   if (stage_prog_data->pull_param == NULL)
+      return;
+
    int pull_constant_loc[this->uniforms];
    memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
 
index dbec82fbd441b76776d8e8ab1442a22cb9b5c517..db9d94d3b34cc97a362131ca751f49b0e32b3ded 100644 (file)
@@ -932,7 +932,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
     NULL
 };
 
-static bool
+static int
 intel_get_param(__DRIscreen *psp, int param, int *value)
 {
    int ret;
@@ -943,20 +943,17 @@ intel_get_param(__DRIscreen *psp, int param, int *value)
    gp.value = value;
 
    ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
-   if (ret) {
-      if (ret != -EINVAL)
+   if (ret < 0 && ret != -EINVAL)
         _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
-      return false;
-   }
 
-   return true;
+   return ret;
 }
 
 static bool
 intel_get_boolean(__DRIscreen *psp, int param)
 {
    int value = 0;
-   return intel_get_param(psp, param, &value) && value;
+   return (intel_get_param(psp, param, &value) == 0) && value;
 }
 
 static void
@@ -1093,12 +1090,12 @@ intel_detect_sseu(struct intel_screen *intelScreen)
 
    ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
                          &intelScreen->subslice_total);
-   if (ret != -EINVAL)
+   if (ret < 0 && ret != -EINVAL)
       goto err_out;
 
    ret = intel_get_param(intelScreen->driScrnPriv,
                          I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
-   if (ret != -EINVAL)
+   if (ret < 0 && ret != -EINVAL)
       goto err_out;
 
    /* Without this information, we cannot get the right Braswell brandstrings,
@@ -1114,7 +1111,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
 err_out:
    intelScreen->subslice_total = -1;
    intelScreen->eu_total = -1;
-   _mesa_warning(NULL, "Failed to query GPU properties.\n");
+   _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
 }
 
 static bool
index 4fd2dfef8cc3c73c56e3e0cd50bf807d00be639a..b4d04b4de5fcc17d3e0a539049b4dd9d3a4d0950 100644 (file)
@@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
    st_validate_state(st, ST_PIPELINE_RENDER);
 
    sv = st_create_texture_sampler_view(pipe, stObj->pt);
+   if (!sv) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
+      return;
+   }
 
    setup_render_state(ctx, sv, color, true);
 
@@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
 
    pipe_resource_reference(&vb.buffer, NULL);
 
+   pipe_sampler_view_reference(&sv, NULL);
+
    /* We uploaded modified constants, need to invalidate them. */
    st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
 }
index 5f3ecc1cdfce99714f0bbd629c94938f356191e1..73f34303976a0943455bdc49e10c97b4d84e1e4a 100644 (file)
@@ -47,17 +47,14 @@ env.Alias('mesautil', mesautil)
 Export('mesautil')
 
 
-# http://www.scons.org/wiki/UnitTests
 u_atomic_test = env.Program(
     target = 'u_atomic_test',
     source = ['u_atomic_test.c'],
 )
-alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("u_atomic_test", u_atomic_test)
 
 roundeven_test = env.Program(
     target = 'roundeven_test',
     source = ['roundeven_test.c'],
 )
-alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("roundeven_test", roundeven_test)