SI_SGPR_VERTEX_BUFFERS_HI,
SI_SGPR_BASE_VERTEX,
SI_SGPR_START_INSTANCE,
+ SI_SGPR_DRAWID,
SI_ES_NUM_USER_SGPR,
/* hw VS only */
SI_LS_NUM_USER_SGPR,
/* both TCS and TES */
- SI_SGPR_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_SGPRS,
- SI_SGPR_TCS_OUT_LAYOUT,
+ SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
SI_TES_NUM_USER_SGPR,
/* TCS only */
- SI_SGPR_TCS_IN_LAYOUT = SI_TES_NUM_USER_SGPR,
+ SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
+ SI_SGPR_TCS_OUT_LAYOUT,
+ SI_SGPR_TCS_IN_LAYOUT,
SI_TCS_NUM_USER_SGPR,
/* GS limits */
SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
- SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_CONST_BUFFERS_HI + 1,
+ SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
/* PS only */
SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS,
SI_PARAM_VERTEX_BUFFERS = SI_NUM_RESOURCE_PARAMS,
SI_PARAM_BASE_VERTEX,
SI_PARAM_START_INSTANCE,
- /* [0] = clamp vertex color */
+ SI_PARAM_DRAWID,
+ /* [0] = clamp vertex color, VS as VS only */
SI_PARAM_VS_STATE_BITS,
+ /* same value as TCS_IN_LAYOUT, VS as LS only */
+ SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_DRAWID + 1,
/* the other VS parameters are assigned dynamically */
+ /* Layout of TCS outputs in the offchip buffer
+ * [0:8] = the number of patches per threadgroup.
+ * [9:15] = the number of output vertices per patch.
+ * [16:31] = the offset of per patch attributes in the buffer in bytes.
+ */
+ SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
+
+ /* TCS only parameters. */
+
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
*/
- SI_PARAM_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
+ SI_PARAM_TCS_OUT_OFFSETS,
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
- SI_PARAM_TCS_OUT_LAYOUT, /* for TCS & TES */
+ SI_PARAM_TCS_OUT_LAYOUT,
/* Layout of LS outputs / TCS inputs
* [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
*/
- SI_PARAM_TCS_IN_LAYOUT, /* TCS only */
- SI_PARAM_LS_OUT_LAYOUT, /* same value as TCS_IN_LAYOUT, LS only */
+ SI_PARAM_TCS_IN_LAYOUT,
- /* TCS only parameters. */
- SI_PARAM_TESS_FACTOR_OFFSET = SI_PARAM_TCS_IN_LAYOUT + 1,
+ SI_PARAM_TCS_OC_LDS,
+ SI_PARAM_TESS_FACTOR_OFFSET,
SI_PARAM_PATCH_ID,
SI_PARAM_REL_IDS,
* binaries for one TGSI program. This can be shared by multiple contexts.
*/
struct si_shader_selector {
+ struct si_screen *screen;
+ struct util_queue_fence ready;
+
+ /* Should only be used by si_init_shader_selector_async
+ * if thread_index == -1 (non-threaded). */
+ LLVMTargetMachineRef tm;
+ struct pipe_debug_callback debug;
+ bool is_debug_context;
+
pipe_mutex mutex;
struct si_shader *first_variant; /* immutable after the first variant */
struct si_shader *last_variant; /* mutable */
/* Common TCS bits between the shader key and the epilog key. */
struct si_tcs_epilog_bits {
unsigned prim_mode:3;
+ uint64_t inputs_to_copy;
};
/* Common PS bits between the shader key and the prolog key. */
struct si_ps_prolog_bits {
unsigned color_two_side:1;
- /* TODO: add a flatshade bit that skips interpolation for colors */
+ unsigned flatshade_colors:1;
unsigned poly_stipple:1;
- unsigned force_persample_interp:1;
- /* TODO:
- * - add force_center_interp if MSAA is disabled and centroid or
- * sample are present
- * - add force_center_interp_bc_optimize to force center interpolation
- * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
- * present and sample isn't present.
- */
+ unsigned force_persp_sample_interp:1;
+ unsigned force_linear_sample_interp:1;
+ unsigned force_persp_center_interp:1;
+ unsigned force_linear_center_interp:1;
+ unsigned bc_optimize_for_persp:1;
+ unsigned bc_optimize_for_linear:1;
};
/* Common PS bits between the shader key and the epilog key. */
unsigned colors_read:8; /* color input components read */
unsigned num_interp_inputs:5; /* BCOLOR is at this location */
unsigned face_vgpr_index:5;
+ unsigned wqm:1;
char color_attr_index[2];
char color_interp_vgpr_index[2]; /* -1 == constant */
} ps_prolog;
struct si_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
+ unsigned spilled_sgprs;
+ unsigned spilled_vgprs;
unsigned lds_size;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info info;
+
+ /* Shader key + LLVM IR + disassembly + statistics.
+ * Generated for debug contexts only.
+ */
+ char *shader_log;
+ size_t shader_log_size;
};
struct si_shader_part {
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug);
-void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f);
int si_compile_llvm(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,