#include "compiler/nir/nir_serialize.h"
#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_ureg.h"
#include "util/hash_table.h"
#include "util/crc32.h"
#include "util/u_async_debug.h"
* Return the IR binary in a buffer. For TGSI the first 4 bytes contain its
* size as integer.
*/
-static void *si_get_ir_binary(struct si_shader_selector *sel)
+void *si_get_ir_binary(struct si_shader_selector *sel)
{
struct blob blob;
unsigned ir_size;
*
* Returns false on failure, in which case the ir_binary should be freed.
*/
-static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
- void *ir_binary,
- struct si_shader *shader,
- bool insert_into_disk_cache)
+bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+ struct si_shader *shader,
+ bool insert_into_disk_cache)
{
void *hw_binary;
struct hash_entry *entry;
return true;
}
-static bool si_shader_cache_load_shader(struct si_screen *sscreen,
- void *ir_binary,
- struct si_shader *shader)
+bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+ struct si_shader *shader)
{
struct hash_entry *entry =
_mesa_hash_table_search(sscreen->shader_cache, ir_binary);
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
}
+static unsigned si_conv_prim_to_gs_out(unsigned mode)
+{
+ static const int prim_conv[] = {
+ [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ };
+ assert(mode < ARRAY_SIZE(prim_conv));
+
+ return prim_conv[mode];
+}
+
struct gfx9_gs_info {
unsigned es_verts_per_subgroup;
unsigned gs_prims_per_subgroup;
if (max_stream >= 2)
offset += num_components[2] * sel->gs_max_out_vertices;
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, offset);
+ si_pm4_set_reg(pm4, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
+ si_conv_prim_to_gs_out(sel->gs_output_prim));
if (max_stream >= 3)
offset += num_components[3] * sel->gs_max_out_vertices;
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
}
- if (rs) {
- bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
- sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
- sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
- bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
+ bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
+ bool is_line = util_prim_is_lines(sctx->current_rast_prim);
- key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
- key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
+ key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
+ key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
- if (sctx->queued.named.blend) {
- key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
- rs->multisample_enable;
- }
+ if (sctx->queued.named.blend) {
+ key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
+ rs->multisample_enable;
+ }
- key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
- key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
- (is_line && rs->line_smooth)) &&
- sctx->framebuffer.nr_samples <= 1;
- key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
+ key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
+ key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
+ (is_line && rs->line_smooth)) &&
+ sctx->framebuffer.nr_samples <= 1;
+ key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
- if (sctx->ps_iter_samples > 1 &&
- sel->info.reads_samplemask) {
- key->part.ps.prolog.samplemask_log_ps_iter =
- util_logbase2(sctx->ps_iter_samples);
- }
+ if (sctx->ps_iter_samples > 1 &&
+ sel->info.reads_samplemask) {
+ key->part.ps.prolog.samplemask_log_ps_iter =
+ util_logbase2(sctx->ps_iter_samples);
+ }
- if (rs->force_persample_interp &&
- rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1 &&
- sctx->ps_iter_samples > 1) {
- key->part.ps.prolog.force_persp_sample_interp =
- sel->info.uses_persp_center ||
- sel->info.uses_persp_centroid;
-
- key->part.ps.prolog.force_linear_sample_interp =
- sel->info.uses_linear_center ||
- sel->info.uses_linear_centroid;
- } else if (rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1) {
- key->part.ps.prolog.bc_optimize_for_persp =
- sel->info.uses_persp_center &&
- sel->info.uses_persp_centroid;
- key->part.ps.prolog.bc_optimize_for_linear =
- sel->info.uses_linear_center &&
- sel->info.uses_linear_centroid;
- } else {
- /* Make sure SPI doesn't compute more than 1 pair
- * of (i,j), which is the optimization here. */
- key->part.ps.prolog.force_persp_center_interp =
- sel->info.uses_persp_center +
- sel->info.uses_persp_centroid +
- sel->info.uses_persp_sample > 1;
-
- key->part.ps.prolog.force_linear_center_interp =
- sel->info.uses_linear_center +
- sel->info.uses_linear_centroid +
- sel->info.uses_linear_sample > 1;
-
- if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
- key->mono.u.ps.interpolate_at_sample_force_center = 1;
- }
+ if (rs->force_persample_interp &&
+ rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples > 1) {
+ key->part.ps.prolog.force_persp_sample_interp =
+ sel->info.uses_persp_center ||
+ sel->info.uses_persp_centroid;
+
+ key->part.ps.prolog.force_linear_sample_interp =
+ sel->info.uses_linear_center ||
+ sel->info.uses_linear_centroid;
+ } else if (rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1) {
+ key->part.ps.prolog.bc_optimize_for_persp =
+ sel->info.uses_persp_center &&
+ sel->info.uses_persp_centroid;
+ key->part.ps.prolog.bc_optimize_for_linear =
+ sel->info.uses_linear_center &&
+ sel->info.uses_linear_centroid;
+ } else {
+ /* Make sure SPI doesn't compute more than 1 pair
+ * of (i,j), which is the optimization here. */
+ key->part.ps.prolog.force_persp_center_interp =
+ sel->info.uses_persp_center +
+ sel->info.uses_persp_centroid +
+ sel->info.uses_persp_sample > 1;
+
+ key->part.ps.prolog.force_linear_center_interp =
+ sel->info.uses_linear_center +
+ sel->info.uses_linear_centroid +
+ sel->info.uses_linear_sample > 1;
+
+ if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
+ key->mono.u.ps.interpolate_at_sample_force_center = 1;
}
key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
{
struct si_shader_selector *sel = shader->selector;
struct si_screen *sscreen = sel->screen;
- struct si_compiler *compiler;
+ struct ac_llvm_compiler *compiler;
struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
int r;
main_part->selector = sel;
main_part->key.as_es = key->as_es;
main_part->key.as_ls = key->as_ls;
+ main_part->is_monolithic = false;
if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
- main_part, false,
- &compiler_state->debug) != 0) {
+ main_part, &compiler_state->debug) != 0) {
FREE(main_part);
return false;
}
{
struct si_shader_selector *sel = (struct si_shader_selector *)job;
struct si_screen *sscreen = sel->screen;
- struct si_compiler *compiler;
+ struct ac_llvm_compiler *compiler;
struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
assert(!debug->debug_message || debug->async);
util_queue_fence_init(&shader->ready);
shader->selector = sel;
+ shader->is_monolithic = false;
si_parse_next_shader_property(&sel->info,
sel->so.num_outputs != 0,
&shader->key);
mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
+ if (si_compile_tgsi_shader(sscreen, compiler, shader,
debug) != 0) {
FREE(shader);
FREE(ir_binary);
}
}
+void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
+ struct util_queue_fence *ready_fence,
+ struct si_compiler_ctx_state *compiler_ctx_state,
+ void *job, util_queue_execute_func execute)
+{
+ util_queue_fence_init(ready_fence);
+
+ struct util_async_debug_callback async_debug;
+ bool wait =
+ (sctx->debug.debug_message && !sctx->debug.async) ||
+ sctx->is_debug ||
+ si_can_dump_shader(sctx->screen, processor);
+
+ if (wait) {
+ u_async_debug_init(&async_debug);
+ compiler_ctx_state->debug = async_debug.base;
+ }
+
+ util_queue_add_job(&sctx->screen->shader_compiler_queue, job,
+ ready_fence, execute, NULL);
+
+ if (wait) {
+ util_queue_fence_wait(ready_fence);
+ u_async_debug_drain(&async_debug, &sctx->debug);
+ u_async_debug_cleanup(&async_debug);
+ }
+}
+
/* Return descriptor slot usage masks from the given shader info. */
void si_get_active_slot_masks(const struct tgsi_shader_info *info,
uint32_t *const_and_shader_buffers,
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
+ sel->lshs_vertex_stride = sel->esgs_itemsize;
+
+ /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
+ * will start on a different bank. (except for the maximum 32*16).
+ */
+ if (sel->lshs_vertex_stride < 32*16)
+ sel->lshs_vertex_stride += 4;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start at a different bank.
}
(void) mtx_init(&sel->mutex, mtx_plain);
- util_queue_fence_init(&sel->ready);
-
- struct util_async_debug_callback async_debug;
- bool wait =
- (sctx->debug.debug_message && !sctx->debug.async) ||
- sctx->is_debug ||
- si_can_dump_shader(sscreen, sel->info.processor);
-
- if (wait) {
- u_async_debug_init(&async_debug);
- sel->compiler_ctx_state.debug = async_debug.base;
- }
-
- util_queue_add_job(&sscreen->shader_compiler_queue, sel,
- &sel->ready, si_init_shader_selector_async,
- NULL);
-
- if (wait) {
- util_queue_fence_wait(&sel->ready);
- u_async_debug_drain(&async_debug, &sctx->debug);
- u_async_debug_cleanup(&async_debug);
- }
+ si_schedule_initial_compile(sctx, sel->info.processor, &sel->ready,
+ &sel->compiler_ctx_state, sel,
+ si_init_shader_selector_async);
return sel;
}
static void si_emit_spi_map(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->gfx_cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
unsigned i, num_interp, num_written = 0, bcol_interp[2];
+ unsigned spi_ps_input_cntl[32];
if (!ps || !ps->selector->info.num_inputs)
return;
num_interp = si_get_ps_num_interp(ps);
assert(num_interp > 0);
- radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned name = psinfo->input_semantic_name[i];
unsigned index = psinfo->input_semantic_index[i];
unsigned interpolate = psinfo->input_interpolate[i];
- radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
- interpolate));
- num_written++;
+ spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, name,
+ index, interpolate);
if (name == TGSI_SEMANTIC_COLOR) {
assert(index < ARRAY_SIZE(bcol_interp));
if (!(psinfo->colors_read & (0xf << (i * 4))))
continue;
- radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
- i, bcol_interp[i]));
- num_written++;
+ spi_ps_input_cntl[num_written++] =
+ si_get_ps_input_cntl(sctx, vs, bcol, i, bcol_interp[i]);
+
}
}
assert(num_interp == num_written);
+
+ /* R_028644_SPI_PS_INPUT_CNTL_0 */
+ /* Dota 2: Only ~16% of SPI map updates set different values. */
+ /* Talos: Only ~9% of SPI map updates set different values. */
+ radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
+ spi_ps_input_cntl,
+ sctx->tracked_regs.spi_ps_input_cntl, num_interp);
}
/**
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
}
-/**
- * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
- * VS passes its outputs to TES directly, so the fixed-function shader only
- * has to write TESSOUTER and TESSINNER.
- */
-static void si_generate_fixed_func_tcs(struct si_context *sctx)
-{
- struct ureg_src outer, inner;
- struct ureg_dst tessouter, tessinner;
- struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
-
- if (!ureg)
- return; /* if we get here, we're screwed */
-
- assert(!sctx->fixed_func_tcs_shader.cso);
-
- outer = ureg_DECL_system_value(ureg,
- TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI, 0);
- inner = ureg_DECL_system_value(ureg,
- TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, 0);
-
- tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
- tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
-
- ureg_MOV(ureg, tessouter, outer);
- ureg_MOV(ureg, tessinner, inner);
- ureg_END(ureg);
-
- sctx->fixed_func_tcs_shader.cso =
- ureg_create_shader_and_destroy(ureg, &sctx->b);
-}
-
static void si_update_vgt_shader_config(struct si_context *sctx)
{
/* Calculate the index of the config.
si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
} else {
if (!sctx->fixed_func_tcs_shader.cso) {
- si_generate_fixed_func_tcs(sctx);
+ sctx->fixed_func_tcs_shader.cso =
+ si_create_fixed_func_tcs(sctx);
if (!sctx->fixed_func_tcs_shader.cso)
return false;
}
static void si_emit_scratch_state(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->gfx_cs;
+ struct radeon_cmdbuf *cs = sctx->gfx_cs;
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
sctx->spi_tmpring_size);
}
}
-void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
- unsigned num_layers)
-{
- unsigned vs_blit_property;
- void **vs;
-
- switch (type) {
- case UTIL_BLITTER_ATTRIB_NONE:
- vs = num_layers > 1 ? &sctx->vs_blit_pos_layered :
- &sctx->vs_blit_pos;
- vs_blit_property = SI_VS_BLIT_SGPRS_POS;
- break;
- case UTIL_BLITTER_ATTRIB_COLOR:
- vs = num_layers > 1 ? &sctx->vs_blit_color_layered :
- &sctx->vs_blit_color;
- vs_blit_property = SI_VS_BLIT_SGPRS_POS_COLOR;
- break;
- case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
- case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
- assert(num_layers == 1);
- vs = &sctx->vs_blit_texcoord;
- vs_blit_property = SI_VS_BLIT_SGPRS_POS_TEXCOORD;
- break;
- default:
- assert(0);
- return NULL;
- }
- if (*vs)
- return *vs;
-
- struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
- if (!ureg)
- return NULL;
-
- /* Tell the shader to load VS inputs from SGPRs: */
- ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS, vs_blit_property);
- ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
-
- /* This is just a pass-through shader with 1-3 MOV instructions. */
- ureg_MOV(ureg,
- ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
- ureg_DECL_vs_input(ureg, 0));
-
- if (type != UTIL_BLITTER_ATTRIB_NONE) {
- ureg_MOV(ureg,
- ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0),
- ureg_DECL_vs_input(ureg, 1));
- }
-
- if (num_layers > 1) {
- struct ureg_src instance_id =
- ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
- struct ureg_dst layer =
- ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
-
- ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X),
- ureg_scalar(instance_id, TGSI_SWIZZLE_X));
- }
- ureg_END(ureg);
-
- *vs = ureg_create_shader_and_destroy(ureg, &sctx->b);
- return *vs;
-}
-
void si_init_shader_functions(struct si_context *sctx)
{
sctx->atoms.s.spi_map.emit = si_emit_spi_map;