#print env.Dump()
+# Add a check target for running tests
+check = env.Alias('check')
+env.AlwaysBuild(check)
+
+
#######################################################################
# Invoke host SConscripts
#
build_script:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
+after_build:
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
+
# It's possible to setup notification here, as described in
# http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but
src/gallium/drivers/softpipe/Makefile
src/gallium/drivers/svga/Makefile
src/gallium/drivers/swr/Makefile
- src/gallium/drivers/swr/avx/Makefile
- src/gallium/drivers/swr/avx2/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile
+++ /dev/null
-
-default: full
-
-all: full subset
-
-%.tag: %.doxy
- doxygen $<
-
-FULL = \
- main.doxy \
- math.doxy \
- vbo.doxy \
- glapi.doxy \
- glsl.doxy \
- swrast.doxy \
- swrast_setup.doxy \
- tnl.doxy \
- tnl_dd.doxy \
- gbm.doxy \
- i965.doxy
-
-full: $(FULL:.doxy=.tag)
- $(foreach FILE,$(FULL),doxygen $(FILE);)
-
-SUBSET = \
- main.doxy \
- math.doxy
-
-subset: $(SUBSET:.doxy=.tag)
- $(foreach FILE,$(SUBSET),doxygen $(FILE);)
-
-clean:
- -rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
- -rm -rf *.tag
- -rm -rf *.db
+++ /dev/null
-<html>
-<head>
-<title>Mesa Source Code Documentation</title>
-<link href="doxygen.css" rel="stylesheet" type="text/css">
-</head>
-<body>
-<div class="qindex">
-<a class="qindex" href="../main/index.html">core</a> |
-<a class="qindex" href="../glapi/index.html">glapi</a> |
-<a class="qindex" href="../glsl/index.html">glsl</a> |
-<a class="qindex" href="../vbo/index.html">vbo</a> |
-<a class="qindex" href="../math/index.html">math</a> |
-<a class="qindex" href="../swrast/index.html">swrast</a> |
-<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> |
-<a class="qindex" href="../tnl/index.html">tnl</a> |
-<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> |
-<a class="qindex" href="../gbm/index.html">gbm</a> |
-<a class="qindex" href="../i965/index.html">i965</a>
-</div>
link_dir=$(top_builddir)/$(LIB_DIR)/egl; \
fi; \
$(MKDIR_P) $$link_dir; \
- file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so); \
- file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
- file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
+ file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \
+ file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
+ file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
for f in $$file_list; do \
if test -h .libs/$$f; then \
cp -d $$f $$link_dir; \
return targets
-def createInstallMethods(env):
- env.AddMethod(install_program, 'InstallProgram')
- env.AddMethod(install_shared_library, 'InstallSharedLibrary')
-
-
def msvc2013_compat(env):
if env['gcc']:
env.Append(CCFLAGS = [
'-Werror=pointer-arith',
])
-def createMSVCCompatMethods(env):
- env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+
+def unit_test(env, test_name, program_target, args=None):
+ env.InstallProgram(program_target)
+
+ cmd = [program_target[0].abspath]
+ if args is not None:
+ cmd += args
+ cmd = ' '.join(cmd)
+
+ # http://www.scons.org/wiki/UnitTests
+ action = SCons.Action.Action(cmd, " Running %s ..." % test_name)
+ alias = env.Alias(test_name, program_target, action)
+ env.AlwaysBuild(alias)
+ env.Depends('check', alias)
def num_jobs():
# Custom builders and methods
env.Tool('custom')
- createInstallMethods(env)
- createMSVCCompatMethods(env)
+ env.AddMethod(install_program, 'InstallProgram')
+ env.AddMethod(install_shared_library, 'InstallSharedLibrary')
+ env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+ env.AddMethod(unit_test, 'UnitTest')
env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
Export('compiler')
SConscript('SConscript.glsl')
+SConscript('SConscript.nir')
--- /dev/null
+import common
+
+Import('*')
+
+from sys import executable as python_cmd
+
+env = env.Clone()
+
+env.MSVC2013Compat()
+
+env.Prepend(CPPPATH = [
+ '#include',
+ '#src',
+ '#src/mapi',
+ '#src/mesa',
+ '#src/gallium/include',
+ '#src/gallium/auxiliary',
+ '#src/compiler/nir',
+])
+
+# Make generated headers reachable from the include path.
+env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath])
+
+# nir generated sources
+
+nir_builder_opcodes_h = env.CodeGenerate(
+ target = 'nir/nir_builder_opcodes.h',
+ script = 'nir/nir_builder_opcodes_h.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_constant_expressions.c',
+ script = 'nir/nir_constant_expressions.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opcodes.h',
+ script = 'nir/nir_opcodes_h.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opcodes.c',
+ script = 'nir/nir_opcodes_c.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opt_algebraic.c',
+ script = 'nir/nir_opt_algebraic.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+# parse Makefile.sources
+source_lists = env.ParseSourceList('Makefile.sources')
+
+nir_sources = source_lists['NIR_FILES']
+nir_sources += source_lists['NIR_GENERATED_FILES']
+
+nir = env.ConvenienceLibrary(
+ target = 'nir',
+ source = nir_sources,
+)
+
+env.Alias('nir', nir)
+Export('nir')
bool is_ssa;
} nir_src;
-#define NIR_SRC_INIT (nir_src) { { NULL } }
+static inline nir_src
+nir_src_init(void)
+{
+ nir_src src = { { NULL } };
+ return src;
+}
+
+#define NIR_SRC_INIT nir_src_init()
#define nir_foreach_use(reg_or_ssa_def, src) \
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
bool is_ssa;
} nir_dest;
-#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+static inline nir_dest
+nir_dest_init(void)
+{
+ nir_dest dest = { { { NULL } } };
+ return dest;
+}
+
+#define NIR_DEST_INIT nir_dest_init()
#define nir_foreach_def(reg, dest) \
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
NIR_INTRINSIC_UCP_ID = 4,
/**
- * The ammount of data, starting from BASE, that this instruction may
+ * The amount of data, starting from BASE, that this instruction may
* access. This is used to provide bounds if the offset is not constant.
*/
NIR_INTRINSIC_RANGE = 5,
#define ARR(...) { __VA_ARGS__ }
-INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
+INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
/*
* Interpolation of input. The interp_var_at* intrinsics are similar to the
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
* around/optimized in general
*/
-#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
+#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
BARRIER(barrier)
BARRIER(discard)
* The latter can be used as code motion barrier, which is currently not
* feasible with NIR.
*/
-INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Memory barrier with semantics analogous to the compute shader
*
* end_primitive implements GLSL's EndPrimitive() built-in.
*/
-INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(emit_vertex, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
/**
* Geometry Shader intrinsics with a vertex count.
*/
#define ATOMIC(name, flags) \
- INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
+ INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
ATOMIC(inc, 0)
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
+INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
+INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
- INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+ INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
idx0, idx1, idx2, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
* of the start of the variable being loaded and and the offset source is a
* offset into that variable.
*
- * Uniform load operations have a second index that specifies the size of the
- * variable being loaded. If const_index[1] == 0, then the size is unknown.
+ * Uniform load operations have a second "range" index that specifies the
+ * range (starting at base) of the data from which we are loading. If
+ * const_index[1] == 0, then the range is unknown.
*
* Some load operations such as UBO/SSBO load and per_vertex loads take an
* additional source to specify which UBO/SSBO/vertex to load from.
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
-/* src[] = { offset }. const_index[] = { base, range } */
-LOAD(uniform, 1, 2, BASE, RANGE, xx,
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { buffer_index, offset }. No const_index */
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { offset }. const_index[] = { base } */
intrin->variables[0]->var->data.driver_location);
if (load->intrinsic == nir_intrinsic_load_uniform) {
- load->const_index[1] =
- state->type_size(intrin->variables[0]->var->type);
+ nir_intrinsic_set_range(load,
+ state->type_size(intrin->variables[0]->var->type));
}
if (per_vertex)
#include <stdlib.h>
#include <inttypes.h> /* for PRIx64 macro */
+#if defined(_WIN32) && !defined(snprintf)
+#define snprintf _snprintf
+#endif
+
static void
print_tabs(unsigned num_tabs, FILE *fp)
{
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
- [NIR_INTRINSIC_DESC_SET] = "desc-set",
- [NIR_INTRINSIC_BINDING] = "binding",
};
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
if (!info->index_map[idx])
#include "nir.h"
#include <stdlib.h>
-#include <unistd.h>
/*
* Implements the classic to-SSA algorithm described by Cytron et. al. in
if HAVE_GALLIUM_SWR
SUBDIRS += drivers/swr
-SUBDIRS += drivers/swr/avx
-SUBDIRS += drivers/swr/avx2
endif
## vc4/rpi
target = testname,
source = [testname + '.c', 'lp_test_main.c'],
)
- env.InstallProgram(target)
-
- # http://www.scons.org/wiki/UnitTests
- alias = env.Alias(testname, [target], target[0].abspath)
- AlwaysBuild(alias)
+ env.UnitTest(testname, target)
Export('llvmpipe')
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
#define GM107_COMPUTE_CLASS 0x0000b0c0
+#define GM200_COMPUTE_CLASS 0x0000b1c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
if (debug_get_bool_option("NVF0_COMPUTE", false))
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
- case 0x120:
return 0;
default:
return -1;
case 0x110:
obj_class = GM107_COMPUTE_CLASS;
break;
+ case 0x120:
+ obj_class = GM200_COMPUTE_CLASS;
+ break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
return -1;
0, 0, resource, level, box);
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+ if (!data) {
+ pipe_resource_reference((struct pipe_resource **)&staging, NULL);
+ return NULL;
+ }
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
{
sctx->const_buffers[shader].desc.pointer_dirty = true;
sctx->rw_buffers[shader].desc.pointer_dirty = true;
+ sctx->shader_buffers[shader].desc.pointer_dirty = true;
sctx->samplers[shader].views.desc.pointer_dirty = true;
+ sctx->images[shader].desc.pointer_dirty = true;
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffers.pointer_dirty = true;
radeon_llvm_dispose(&ctx.radeon_bld);
+ /* Add the scratch offset to input SGPRs. */
+ if (shader->config.scratch_bytes_per_wave)
+ shader->info.num_input_sgprs += 1; /* scratch byte offset */
+
/* Calculate the number of fragment input VGPRs. */
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
shader->info.num_input_vgprs = 0;
return true;
}
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+ unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
}
}
+ si_fix_num_sgprs(shader);
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
stderr);
}
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
- sscreen->b.family >= CHIP_STONEY) {
+ sscreen->b.family == CHIP_STONEY) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
- num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
- shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
- num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with tessellation factor
- * buffer offset. */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B428_SGPRS((num_sgprs - 1) / 8) |
+ S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(1) |
S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
- S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B328_SGPRS((num_sgprs - 1) / 8) |
+ S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(1) |
S_00B328_FLOAT_MODE(shader->config.float_mode));
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
- num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
- if ((num_user_sgprs + 2) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B228_SGPRS((num_sgprs - 1) / 8) |
+ S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(1) |
S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
- S_00B22C_USER_SGPR(num_user_sgprs) |
+ S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) |
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
/* VS is required to export at least one param. */
nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B128_SGPRS((num_sgprs - 1) / 8) |
+ S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
- unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
- num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B028_SGPRS((num_sgprs - 1) / 8) |
+ S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
- S_00B02C_USER_SGPR(num_user_sgprs) |
+ S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
/* Prefer RE_Z if the shader is complex enough. The requirement is either:
libmesaswr_la_SOURCES = $(LOADER_SOURCES)
-EXTRA_DIST = Makefile.sources-arch
+COMMON_CXXFLAGS = \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(LLVM_CFLAGS) \
+ -I$(builddir)/rasterizer/scripts \
+ -I$(builddir)/rasterizer/jitter \
+ -I$(srcdir)/rasterizer \
+ -I$(srcdir)/rasterizer/core \
+ -I$(srcdir)/rasterizer/jitter
+
+COMMON_SOURCES = \
+ $(CXX_SOURCES) \
+ $(COMMON_CXX_SOURCES) \
+ $(CORE_CXX_SOURCES) \
+ $(JITTER_CXX_SOURCES) \
+ $(MEMORY_CXX_SOURCES) \
+ $(BUILT_SOURCES)
+
+BUILT_SOURCES = \
+ rasterizer/scripts/gen_knobs.cpp \
+ rasterizer/scripts/gen_knobs.h \
+ rasterizer/jitter/state_llvm.h \
+ rasterizer/jitter/builder_gen.h \
+ rasterizer/jitter/builder_gen.cpp \
+ rasterizer/jitter/builder_x86.h \
+ rasterizer/jitter/builder_x86.cpp
+
+rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/scripts/gen_knobs.py \
+ rasterizer/scripts
+
+rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
+ --input $(srcdir)/rasterizer/core/state.h \
+ --output rasterizer/jitter/state_llvm.h
+
+rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+ --output rasterizer/jitter/builder_gen.h \
+ --gen_h
+
+rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+ --output rasterizer/jitter/builder_gen.cpp \
+ --gen_cpp
+
+rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --output rasterizer/jitter/builder_x86.h \
+ --gen_x86_h
+
+rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --output rasterizer/jitter/builder_x86.cpp \
+ --gen_x86_cpp
+
+
+COMMON_LIBADD = \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/mesa/libmesagallium.la
+
+lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
+
+libswrAVX_la_CXXFLAGS = \
+ -march=core-avx-i \
+ -DKNOB_ARCH=KNOB_ARCH_AVX \
+ $(COMMON_CXXFLAGS)
+
+libswrAVX_la_SOURCES = \
+ $(COMMON_SOURCES)
+
+libswrAVX_la_LIBADD = \
+ $(COMMON_LIBADD)
+
+libswrAVX2_la_CXXFLAGS = \
+ -march=core-avx2 \
+ -DKNOB_ARCH=KNOB_ARCH_AVX2 \
+ $(COMMON_CXXFLAGS)
+
+libswrAVX2_la_SOURCES = \
+ $(COMMON_SOURCES)
+
+libswrAVX2_la_LIBADD = \
+ $(COMMON_LIBADD)
+
+include $(top_srcdir)/install-gallium-links.mk
LOADER_SOURCES := \
swr_loader.cpp
+
+CXX_SOURCES := \
+ swr_clear.cpp \
+ swr_context.cpp \
+ swr_context.h \
+ swr_context_llvm.h \
+ swr_draw.cpp \
+ swr_public.h \
+ swr_resource.h \
+ swr_screen.cpp \
+ swr_screen.h \
+ swr_state.cpp \
+ swr_state.h \
+ swr_tex_sample.cpp \
+ swr_tex_sample.h \
+ swr_scratch.h \
+ swr_scratch.cpp \
+ swr_shader.cpp \
+ swr_memory.h \
+ swr_fence.h \
+ swr_fence.cpp \
+ swr_query.h \
+ swr_query.cpp
+
+COMMON_CXX_SOURCES := \
+ rasterizer/common/containers.hpp \
+ rasterizer/common/formats.cpp \
+ rasterizer/common/formats.h \
+ rasterizer/common/isa.hpp \
+ rasterizer/common/os.h \
+ rasterizer/common/rdtsc_buckets.cpp \
+ rasterizer/common/rdtsc_buckets.h \
+ rasterizer/common/rdtsc_buckets_shared.h \
+ rasterizer/common/rdtsc_buckets_shared.h \
+ rasterizer/common/simdintrin.h \
+ rasterizer/common/swr_assert.cpp \
+ rasterizer/common/swr_assert.h
+
+CORE_CXX_SOURCES := \
+ rasterizer/core/api.cpp \
+ rasterizer/core/api.h \
+ rasterizer/core/arena.h \
+ rasterizer/core/backend.cpp \
+ rasterizer/core/backend.h \
+ rasterizer/core/blend.h \
+ rasterizer/core/clip.cpp \
+ rasterizer/core/clip.h \
+ rasterizer/core/context.h \
+ rasterizer/core/depthstencil.h \
+ rasterizer/core/fifo.hpp \
+ rasterizer/core/format_traits.h \
+ rasterizer/core/format_types.h \
+ rasterizer/core/frontend.cpp \
+ rasterizer/core/frontend.h \
+ rasterizer/core/knobs.h \
+ rasterizer/core/knobs_init.h \
+ rasterizer/core/multisample.cpp \
+ rasterizer/core/multisample.h \
+ rasterizer/core/pa_avx.cpp \
+ rasterizer/core/pa.h \
+ rasterizer/core/rasterizer.cpp \
+ rasterizer/core/rasterizer.h \
+ rasterizer/core/rdtsc_core.cpp \
+ rasterizer/core/rdtsc_core.h \
+ rasterizer/core/ringbuffer.h \
+ rasterizer/core/state.h \
+ rasterizer/core/threads.cpp \
+ rasterizer/core/threads.h \
+ rasterizer/core/tilemgr.cpp \
+ rasterizer/core/tilemgr.h \
+ rasterizer/core/utils.cpp \
+ rasterizer/core/utils.h
+
+JITTER_CXX_SOURCES := \
+ rasterizer/jitter/blend_jit.cpp \
+ rasterizer/jitter/blend_jit.h \
+ rasterizer/jitter/builder.cpp \
+ rasterizer/jitter/builder.h \
+ rasterizer/jitter/builder_misc.cpp \
+ rasterizer/jitter/builder_misc.h \
+ rasterizer/jitter/fetch_jit.cpp \
+ rasterizer/jitter/fetch_jit.h \
+ rasterizer/jitter/JitManager.cpp \
+ rasterizer/jitter/JitManager.h \
+ rasterizer/jitter/streamout_jit.cpp \
+ rasterizer/jitter/streamout_jit.h
+
+MEMORY_CXX_SOURCES := \
+ rasterizer/memory/ClearTile.cpp \
+ rasterizer/memory/LoadTile.cpp \
+ rasterizer/memory/StoreTile.cpp
+++ /dev/null
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-CXX_SOURCES := \
- swr_clear.cpp \
- swr_context.cpp \
- swr_context.h \
- swr_context_llvm.h \
- swr_draw.cpp \
- swr_public.h \
- swr_resource.h \
- swr_screen.cpp \
- swr_screen.h \
- swr_state.cpp \
- swr_state.h \
- swr_tex_sample.cpp \
- swr_tex_sample.h \
- swr_scratch.h \
- swr_scratch.cpp \
- swr_shader.cpp \
- swr_memory.h \
- swr_fence.h \
- swr_fence.cpp \
- swr_query.h \
- swr_query.cpp
-
-COMMON_CXX_SOURCES := \
- rasterizer/common/containers.hpp \
- rasterizer/common/formats.cpp \
- rasterizer/common/formats.h \
- rasterizer/common/isa.hpp \
- rasterizer/common/os.h \
- rasterizer/common/rdtsc_buckets.cpp \
- rasterizer/common/rdtsc_buckets.h \
- rasterizer/common/rdtsc_buckets_shared.h \
- rasterizer/common/rdtsc_buckets_shared.h \
- rasterizer/common/simdintrin.h \
- rasterizer/common/swr_assert.cpp \
- rasterizer/common/swr_assert.h
-
-CORE_CXX_SOURCES := \
- rasterizer/core/api.cpp \
- rasterizer/core/api.h \
- rasterizer/core/arena.h \
- rasterizer/core/backend.cpp \
- rasterizer/core/backend.h \
- rasterizer/core/blend.h \
- rasterizer/core/clip.cpp \
- rasterizer/core/clip.h \
- rasterizer/core/context.h \
- rasterizer/core/depthstencil.h \
- rasterizer/core/fifo.hpp \
- rasterizer/core/format_traits.h \
- rasterizer/core/format_types.h \
- rasterizer/core/frontend.cpp \
- rasterizer/core/frontend.h \
- rasterizer/core/knobs.h \
- rasterizer/core/knobs_init.h \
- rasterizer/core/multisample.cpp \
- rasterizer/core/multisample.h \
- rasterizer/core/pa_avx.cpp \
- rasterizer/core/pa.h \
- rasterizer/core/rasterizer.cpp \
- rasterizer/core/rasterizer.h \
- rasterizer/core/rdtsc_core.cpp \
- rasterizer/core/rdtsc_core.h \
- rasterizer/core/ringbuffer.h \
- rasterizer/core/state.h \
- rasterizer/core/threads.cpp \
- rasterizer/core/threads.h \
- rasterizer/core/tilemgr.cpp \
- rasterizer/core/tilemgr.h \
- rasterizer/core/utils.cpp \
- rasterizer/core/utils.h
-
-JITTER_CXX_SOURCES := \
- rasterizer/jitter/blend_jit.cpp \
- rasterizer/jitter/blend_jit.h \
- rasterizer/jitter/builder.cpp \
- rasterizer/jitter/builder.h \
- rasterizer/jitter/builder_misc.cpp \
- rasterizer/jitter/builder_misc.h \
- rasterizer/jitter/fetch_jit.cpp \
- rasterizer/jitter/fetch_jit.h \
- rasterizer/jitter/JitManager.cpp \
- rasterizer/jitter/JitManager.h \
- rasterizer/jitter/streamout_jit.cpp \
- rasterizer/jitter/streamout_jit.h
-
-MEMORY_CXX_SOURCES := \
- rasterizer/memory/ClearTile.cpp \
- rasterizer/memory/LoadTile.cpp \
- rasterizer/memory/StoreTile.cpp
+++ /dev/null
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
- -march=core-avx-i \
- -DKNOB_ARCH=KNOB_ARCH_AVX \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(LLVM_CFLAGS) \
- -I$(builddir)/rasterizer/scripts \
- -I$(builddir)/rasterizer/jitter \
- -I$(srcdir)/../rasterizer \
- -I$(srcdir)/../rasterizer/core \
- -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX.la
-
-BUILT_SOURCES = \
- rasterizer/scripts/gen_knobs.cpp \
- rasterizer/scripts/gen_knobs.h \
- rasterizer/jitter/state_llvm.h \
- rasterizer/jitter/builder_gen.h \
- rasterizer/jitter/builder_gen.cpp \
- rasterizer/jitter/builder_x86.h \
- rasterizer/jitter/builder_x86.cpp
-
-libswrAVX_la_SOURCES = \
- $(CXX_SOURCES) \
- $(COMMON_CXX_SOURCES) \
- $(CORE_CXX_SOURCES) \
- $(JITTER_CXX_SOURCES) \
- $(MEMORY_CXX_SOURCES) \
- $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/scripts/gen_knobs.py \
- rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
- --input $(srcdir)/../rasterizer/core/state.h \
- --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.h \
- --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.cpp \
- --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.h \
- --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.cpp \
- --gen_x86_cpp
-
-
-libswrAVX_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/libgallium.la \
- $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
+++ /dev/null
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
- -march=core-avx2 \
- -DKNOB_ARCH=KNOB_ARCH_AVX2 \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(LLVM_CFLAGS) \
- -I$(builddir)/rasterizer/scripts \
- -I$(builddir)/rasterizer/jitter \
- -I$(srcdir)/../rasterizer \
- -I$(srcdir)/../rasterizer/core \
- -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX2.la
-
-BUILT_SOURCES = \
- rasterizer/scripts/gen_knobs.cpp \
- rasterizer/scripts/gen_knobs.h \
- rasterizer/jitter/state_llvm.h \
- rasterizer/jitter/builder_gen.h \
- rasterizer/jitter/builder_gen.cpp \
- rasterizer/jitter/builder_x86.h \
- rasterizer/jitter/builder_x86.cpp
-
-libswrAVX2_la_SOURCES = \
- $(CXX_SOURCES) \
- $(COMMON_CXX_SOURCES) \
- $(CORE_CXX_SOURCES) \
- $(JITTER_CXX_SOURCES) \
- $(MEMORY_CXX_SOURCES) \
- $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/scripts/gen_knobs.py \
- rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
- --input $(srcdir)/../rasterizer/core/state.h \
- --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.h \
- --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.cpp \
- --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.h \
- --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.cpp \
- --gen_x86_cpp
-
-
-libswrAVX2_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/libgallium.la \
- $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
target = progname,
source = progname + '.c',
)
-
- env.Alias(progname, env.InstallProgram(prog))
-
- # http://www.scons.org/wiki/UnitTests
- test_alias = env.Alias('unit', [prog], prog[0].abspath)
- AlwaysBuild(test_alias)
-
+ if progname not in [
+ 'u_cache_test', # too long
+ 'translate_test', # unreliable
+ ]:
+ env.UnitTest(progname, prog)
#include <stdio.h>
+#include <stdlib.h>
#include "os/os_thread.h"
#include "os/os_time.h"
+#include "util/u_atomic.h"
#define NUM_THREADS 10
+static int verbosity = 0;
+
static pipe_thread threads[NUM_THREADS];
static pipe_barrier barrier;
static int thread_ids[NUM_THREADS];
+static volatile int waiting = 0;
+static volatile int proceeded = 0;
+
+
+#define LOG(fmt, ...) \
+ if (verbosity > 0) { \
+ fprintf(stdout, fmt, ##__VA_ARGS__); \
+ }
+
+#define CHECK(_cond) \
+ if (!(_cond)) { \
+ fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \
+ _exit(EXIT_FAILURE); \
+ }
+
static PIPE_THREAD_ROUTINE(thread_function, thread_data)
{
int thread_id = *((int *) thread_data);
- printf("thread %d starting\n", thread_id);
- os_time_sleep(thread_id * 1000 * 1000);
- printf("thread %d before barrier\n", thread_id);
+ LOG("thread %d starting\n", thread_id);
+ os_time_sleep(thread_id * 100 * 1000);
+ LOG("thread %d before barrier\n", thread_id);
+
+ CHECK(p_atomic_read(&proceeded) == 0);
+ p_atomic_inc(&waiting);
+
pipe_barrier_wait(&barrier);
- printf("thread %d exiting\n", thread_id);
+
+ CHECK(p_atomic_read(&waiting) == NUM_THREADS);
+
+ p_atomic_inc(&proceeded);
+
+ LOG("thread %d exiting\n", thread_id);
return 0;
}
-int main()
+int main(int argc, char *argv[])
{
int i;
- printf("pipe_barrier_test starting\n");
+ for (i = 1; i < argc; ++i) {
+ const char *arg = argv[i];
+ if (strcmp(arg, "-v") == 0) {
+ ++verbosity;
+ } else {
+ fprintf(stderr, "error: unrecognized option `%s`\n", arg);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ // Disable buffering
+ setbuf(stdout, NULL);
+
+ LOG("pipe_barrier_test starting\n");
pipe_barrier_init(&barrier, NUM_THREADS);
pipe_thread_wait(threads[i]);
}
+ CHECK(p_atomic_read(&proceeded) == NUM_THREADS);
+
pipe_barrier_destroy(&barrier);
- printf("pipe_barrier_test exiting\n");
+ LOG("pipe_barrier_test exiting\n");
return 0;
}
util_cpu_detect();
- if(argc <= 1)
- {}
+ if (argc <= 1 ||
+ !strcmp(argv[1], "default") )
+ create_fn = translate_create;
else if (!strcmp(argv[1], "generic"))
create_fn = translate_generic_create;
else if (!strcmp(argv[1], "x86"))
if (!create_fn)
{
- printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
+ printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
return 2;
}
*/
const unsigned int max_push_components = 16 * 8;
- /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
- * 128 bytes, which is the maximum vulkan push constant size.
+ /* We push small arrays, but no bigger than 16 floats. This is big enough
+ * for a vec4 but hopefully not large enough to push out other stuff. We
+ * should probably use a better heuristic at some point.
*/
- const unsigned int max_chunk_size = 32;
+ const unsigned int max_chunk_size = 16;
unsigned int num_push_constants = 0;
unsigned int num_pull_constants = 0;
if (!contiguous[u]) {
unsigned chunk_size = u - chunk_start + 1;
- if (num_push_constants + chunk_size <= max_push_components &&
- chunk_size <= max_chunk_size) {
+ /* Decide whether we should push or pull this parameter. In the
+ * Vulkan driver, push constants are explicitly exposed via the API
+ * so we push everything. In GL, we only push small arrays.
+ */
+ if (stage_prog_data->pull_param == NULL ||
+ (num_push_constants + chunk_size <= max_push_components &&
+ chunk_size <= max_chunk_size)) {
+ assert(num_push_constants + chunk_size <= max_push_components);
for (unsigned j = chunk_start; j <= u; j++)
push_constant_loc[j] = num_push_constants++;
} else {
case SHADER_OPCODE_MOV_INDIRECT:
/* Prior to Broadwell, we only have 8 address subregisters */
- return devinfo->gen < 8 ? 8 : inst->exec_size;
+ return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
default:
return inst->exec_size;
/* The destination stride of an instruction (in bytes) must be greater
* than or equal to the size of the rest of the instruction. Since the
* address register is of type UW, we can't use a D-type instruction.
- * In order to get around this, re re-type to UW and use a stride.
+ * In order to get around this, re retype to UW and use a stride.
*/
indirect_byte_offset =
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+ struct brw_reg ind_src;
if (devinfo->gen < 8) {
- /* Prior to broadwell, we have a restriction that the bottom 5 bits
- * of the base offset and the bottom 5 bits of the indirect must add
- * to less than 32. In other words, the hardware needs to be able to
- * add the bottom five bits of the two to get the subnumber and add
- * the next 7 bits of each to get the actual register number. Since
- * the indirect may cause us to cross a register boundary, this makes
- * it almost useless. We could try and do something clever where we
- * use a actual base offset if base_offset % 32 == 0 but that would
- * mean we were generating different code depending on the base
- * offset. Instead, for the sake of consistency, we'll just do the
- * add ourselves.
+ /* From the Haswell PRM section "Register Region Restrictions":
+ *
+ * "The lower bits of the AddressImmediate must not overflow to
+ * change the register address. The lower 5 bits of Address
+ * Immediate when added to lower 5 bits of address register gives
+ * the sub-register offset. The upper bits of Address Immediate
+ * when added to upper bits of address register gives the register
+ * address. Any overflow from sub-register offset is dropped."
+ *
+ * This restriction is only listed in the Haswell PRM but emperical
+ * testing indicates that it applies on all older generations and is
+ * lifted on Broadwell.
+ *
+ * Since the indirect may cause us to cross a register boundary, this
+ * makes the base offset almost useless. We could try and do
+ * something clever where we use a actual base offset if
+ * base_offset % 32 == 0 but that would mean we were generating
+ * different code depending on the base offset. Instead, for the
+ * sake of consistency, we'll just do the add ourselves.
*/
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+ ind_src = brw_VxH_indirect(0, 0);
} else {
brw_MOV(p, addr, indirect_byte_offset);
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ ind_src = brw_VxH_indirect(0, imm_byte_offset);
+ }
+
+ brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
+
+ if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
+ !inst->get_next()->is_tail_sentinel() &&
+ ((fs_inst *)inst->get_next())->mlen > 0) {
+ /* From the Sandybridge PRM:
+ *
+ * "[Errata: DevSNB(SNB)] If MRF register is updated by any
+ * instruction that “indexed/indirect” source AND is followed by a
+ * send, the instruction requires a “Switch”. This is to avoid
+ * race condition where send may dispatch before MRF is updated."
+ */
+ brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
}
}
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset == NULL) {
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
- BRW_REGISTER_TYPE_D);
+ BRW_REGISTER_TYPE_UD);
for (int i = 0; i < instr->num_components; i++)
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
- int base_offset,
+ int base_offset,
src_reg indirect);
void emit_pull_constant_load_reg(dst_reg dst,
src_reg surf_index,
pull->mlen = 2;
pull->header_size = 1;
} else if (devinfo->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
grf_offset.type = offset_reg.type;
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
- int base_offset, src_reg indirect)
+ int base_offset, src_reg indirect)
{
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset;
if (indirect.file != BAD_FILE) {
- offset = src_reg(this, glsl_type::int_type);
+ offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, ADD(dst_reg(offset), indirect,
- brw_imm_d(reg_offset * 16)));
+ brw_imm_ud(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
- offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+ offset = src_reg(this, glsl_type::uint_type);
+ emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
} else {
offset = brw_imm_d(reg_offset * 16);
}
void
vec4_visitor::move_uniform_array_access_to_pull_constants()
{
+ /* The vulkan dirver doesn't support pull constants other than UBOs so
+ * everything has to be pushed regardless.
+ */
+ if (stage_prog_data->pull_param == NULL)
+ return;
+
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
NULL
};
-static bool
+static int
intel_get_param(__DRIscreen *psp, int param, int *value)
{
int ret;
gp.value = value;
ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
- if (ret) {
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
_mesa_warning(NULL, "drm_i915_getparam: %d", ret);
- return false;
- }
- return true;
+ return ret;
}
static bool
intel_get_boolean(__DRIscreen *psp, int param)
{
int value = 0;
- return intel_get_param(psp, param, &value) && value;
+ return (intel_get_param(psp, param, &value) == 0) && value;
}
static void
ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
&intelScreen->subslice_total);
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
goto err_out;
ret = intel_get_param(intelScreen->driScrnPriv,
I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
goto err_out;
/* Without this information, we cannot get the right Braswell brandstrings,
err_out:
intelScreen->subslice_total = -1;
intelScreen->eu_total = -1;
- _mesa_warning(NULL, "Failed to query GPU properties.\n");
+ _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
}
static bool
st_validate_state(st, ST_PIPELINE_RENDER);
sv = st_create_texture_sampler_view(pipe, stObj->pt);
+ if (!sv) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
+ return;
+ }
setup_render_state(ctx, sv, color, true);
pipe_resource_reference(&vb.buffer, NULL);
+ pipe_sampler_view_reference(&sv, NULL);
+
/* We uploaded modified constants, need to invalidate them. */
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
}
Export('mesautil')
-# http://www.scons.org/wiki/UnitTests
u_atomic_test = env.Program(
target = 'u_atomic_test',
source = ['u_atomic_test.c'],
)
-alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("u_atomic_test", u_atomic_test)
roundeven_test = env.Program(
target = 'roundeven_test',
source = ['roundeven_test.c'],
)
-alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("roundeven_test", roundeven_test)