uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
va = shader->bo->gpu_address;
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
- shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
- shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
- S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with tessellation factor
* buffer offset. */
if ((num_user_sgprs + 1) > num_sgprs) {
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
- S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B428_SGPRS((num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
S_00B42C_USER_SGPR(num_user_sgprs) |
- S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
va = shader->bo->gpu_address;
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
}
assert(num_sgprs <= 104);
+ si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
- S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B328_SGPRS((num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
- S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
si_set_tesseval_regs(shader, pm4);
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
if (gs_max_vert_out <= 128) {
si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
- si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
if ((num_user_sgprs + 2) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
- S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B228_SGPRS((num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
S_00B22C_USER_SGPR(num_user_sgprs) |
- S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_vs(struct si_shader *shader)
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
/* If this is the GS copy shader, the GS state writes this register.
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
- S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_SGPRS((num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
- S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
if (window_space)
si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
si_set_tesseval_regs(shader, pm4);
}
+static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
+{
+ unsigned value = shader->key.ps.spi_shader_col_format;
+ unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
+
+ /* If the i-th target format is set, all previous target formats must
+ * be non-zero to avoid hangs.
+ */
+ for (i = 0; i < num_targets; i++)
+ if (!(value & (0xf << (i * 4))))
+ value |= V_028714_SPI_SHADER_32_R << (i * 4);
+
+ return value;
+}
+
+static unsigned si_get_cb_shader_mask(unsigned spi_shader_col_format)
+{
+ unsigned i, cb_shader_mask = 0;
+
+ for (i = 0; i < 8; i++) {
+ switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+ case V_028714_SPI_SHADER_ZERO:
+ break;
+ case V_028714_SPI_SHADER_32_R:
+ cb_shader_mask |= 0x1 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_GR:
+ cb_shader_mask |= 0x3 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_AR:
+ cb_shader_mask |= 0x9 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ case V_028714_SPI_SHADER_32_ABGR:
+ cb_shader_mask |= 0xf << (i * 4);
+ break;
+ default:
+ assert(0);
+ }
+ }
+ return cb_shader_mask;
+}
+
static void si_shader_ps(struct si_shader *shader)
{
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
- unsigned i, spi_ps_in_control;
+ unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned num_sgprs, num_user_sgprs;
- unsigned spi_baryc_cntl = 0;
+ unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
+ if (!pm4)
return;
- for (i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
- * Possible vaules:
- * 0 -> Position = pixel center (default)
- * 1 -> Position = pixel centroid
- * 2 -> Position = at sample position
- */
- switch (info->input_interpolate_loc[i]) {
- case TGSI_INTERPOLATE_LOC_CENTROID:
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(1);
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
- break;
- }
+ /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
+ * Possible vaules:
+ * 0 -> Position = pixel center
+ * 1 -> Position = pixel centroid
+ * 2 -> Position = at sample position
+ *
+ * From GLSL 4.5 specification, section 7.1:
+ * "The variable gl_FragCoord is available as an input variable from
+ * within fragment shaders and it holds the window relative coordinates
+ * (x, y, z, 1/w) values for the fragment. If multi-sampling, this
+ * value can be for any location within the pixel, or one of the
+ * fragment samples. The use of centroid does not further restrict
+ * this value to be inside the current primitive."
+ *
+ * Meaning that centroid has no effect and we can return anything within
+ * the pixel. Thus, return the value at sample position, because that's
+ * the most accurate one shaders can get.
+ */
+ spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
- if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
- TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
- break;
- }
- }
+ if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
+ TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
+ spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
- has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
- G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
+ spi_shader_col_format = si_get_spi_shader_col_format(shader);
+ cb_shader_mask = si_get_cb_shader_mask(spi_shader_col_format);
+
+ /* This must be non-zero for alpha-test/kill to work.
+ * The hardware ignores the EXEC mask if no export memory is allocated.
+ * Don't add this to CB_SHADER_MASK.
+ */
+ if (!spi_shader_col_format &&
+ !info->writes_z && !info->writes_stencil && !info->writes_samplemask &&
+ (shader->selector->info.uses_kill ||
+ shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
+ spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+
+ /* Set interpolation controls. */
+ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
+ /* Set registers. */
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
- si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
- si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
- shader->spi_shader_col_format);
- si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
+ si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
+ info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
+ info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
+ info->writes_z ? V_028710_SPI_SHADER_32_R :
+ V_028710_SPI_SHADER_ZERO);
+
+ si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
+ si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->num_sgprs;
+ num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
- S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B028_SGPRS((num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
- S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
+ S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
S_00B02C_USER_SGPR(num_user_sgprs) |
- S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_init_pm4_state(struct si_shader *shader)
}
}
+static unsigned si_get_alpha_test_func(struct si_context *sctx)
+{
+ /* Alpha-test should be disabled if colorbuffer 0 is integer. */
+ if (sctx->queued.named.dsa &&
+ !sctx->framebuffer.cb0_is_integer)
+ return sctx->queued.named.dsa->alpha_func;
+
+ return PIPE_FUNC_ALWAYS;
+}
+
/* Compute the key for the hw shader variant */
static inline void si_shader_selector_key(struct pipe_context *ctx,
struct si_shader_selector *sel,
if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
- else if (sctx->gs_shader.cso) {
+ else if (sctx->gs_shader.cso)
key->vs.as_es = 1;
- key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
- }
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
sctx->ps_shader.cso->info.uses_primid)
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
- if (sctx->gs_shader.cso) {
+ if (sctx->gs_shader.cso)
key->tes.as_es = 1;
- key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
- } else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+ else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ struct si_state_blend *blend = sctx->queued.named.blend;
- if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+ if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
+ sel->info.colors_written == 0x1)
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
- key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
+
+ if (blend) {
+ /* Select the shader color format based on whether
+ * blending or alpha are needed.
+ */
+ key->ps.spi_shader_col_format =
+ (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+ (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend) |
+ (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_alpha) |
+ (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format);
+ } else
+ key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+
+ /* If alpha-to-coverage is enabled, we have to export alpha
+ * even if there is no color buffer.
+ */
+ if (!(key->ps.spi_shader_col_format & 0xf) &&
+ blend && blend->alpha_to_coverage)
+ key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+
+ /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+ * to the range supported by the type if a channel has less
+ * than 16 bits and the export format is 16_ABGR.
+ */
+ if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+ key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
if (rs) {
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
key->ps.clamp_color = rs->clamp_fragment_color;
}
- key->ps.alpha_func = PIPE_FUNC_ALWAYS;
- /* Alpha-test should be disabled if colorbuffer 0 is integer. */
- if (sctx->queued.named.dsa &&
- !sctx->framebuffer.cb0_is_integer)
- key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
+ key->ps.alpha_func = si_get_alpha_test_func(sctx);
break;
}
default:
shader->selector = sel;
shader->key = key;
- r = si_shader_create(sctx->screen, sctx->tm, shader);
+ r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug);
if (unlikely(r)) {
R600_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
sel->last_variant = shader;
}
state->current = shader;
- p_atomic_inc(&sctx->screen->b.num_compilations);
pipe_mutex_unlock(sel->mutex);
return 0;
}
sel->gs_input_verts_per_prim =
u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
-
- for (i = 0; i < sel->info.num_inputs; i++) {
- unsigned name = sel->info.input_semantic_name[i];
- unsigned index = sel->info.input_semantic_index[i];
-
- switch (name) {
- case TGSI_SEMANTIC_PRIMID:
- break;
- default:
- sel->inputs_read |=
- 1llu << si_shader_io_get_unique_index(name, index);
- }
- }
break;
case PIPE_SHADER_VERTEX:
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
- case PIPE_SHADER_FRAGMENT:
- for (i = 0; i < sel->info.num_outputs; i++) {
- unsigned name = sel->info.output_semantic_name[i];
- unsigned index = sel->info.output_semantic_index[i];
+ }
- if (name == TGSI_SEMANTIC_COLOR)
- sel->ps_colors_written |= 1 << index;
- }
+ /* DB_SHADER_CONTROL */
+ sel->db_shader_control =
+ S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
+ S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
+ S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
+ S_02880C_KILL_ENABLE(sel->info.uses_kill);
+
+ switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
+ case TGSI_FS_DEPTH_LAYOUT_GREATER:
+ sel->db_shader_control |=
+ S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_LESS:
+ sel->db_shader_control |=
+ S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
break;
}
+ /* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
unsigned index = psinfo->input_semantic_index[i];
unsigned interpolate = psinfo->input_interpolate[i];
unsigned param_offset = ps->ps_input_param_offset[i];
-
- if (name == TGSI_SEMANTIC_POSITION ||
- name == TGSI_SEMANTIC_FACE)
- /* Read from preloaded VGPRs, not parameters */
- continue;
-
bcolor:
tmp = 0;
if (!ps)
return;
- input_ena = ps->spi_ps_input_ena;
+ input_ena = ps->config.spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
return 0;
/* This shader doesn't need a scratch buffer */
- if (shader->scratch_bytes_per_wave == 0)
+ if (shader->config.scratch_bytes_per_wave == 0)
return 0;
/* This shader is already configured to use the current
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
- return shader ? shader->scratch_bytes_per_wave : 0;
+ return shader ? shader->config.scratch_bytes_per_wave : 0;
}
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
si_get_max_scratch_bytes_per_wave(sctx);
unsigned scratch_needed_size = scratch_bytes_per_wave *
sctx->scratch_waves;
+ unsigned spi_tmpring_size;
int r;
if (scratch_needed_size > 0) {
assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
"scratch size should already be aligned correctly.");
- sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
- S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+ spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
+ S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+ if (spi_tmpring_size != sctx->spi_tmpring_size) {
+ sctx->spi_tmpring_size = spi_tmpring_size;
+ sctx->emit_scratch_reloc = true;
+ }
return true;
}
si_update_vgt_shader_config(sctx);
if (sctx->ps_shader.cso) {
+ unsigned db_shader_control =
+ sctx->ps_shader.cso->db_shader_control |
+ S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
+
r = si_shader_select(ctx, &sctx->ps_shader);
if (r)
return false;
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
}
- if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
+ if (sctx->ps_db_shader_control != db_shader_control) {
+ sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}