#include "si_pipe.h"
#include "si_shader.h"
#include "sid.h"
-#include "../radeon/r600_cs.h"
+#include "radeon/r600_cs.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_scan.h"
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
-#include "util/u_framebuffer.h"
-#include "util/u_helpers.h"
#include "util/u_memory.h"
static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
*list_elem = atom;
}
-uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split,
- unsigned tile_mode_index)
+uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
- if ((sscreen->b.chip_class == CIK) &&
+ if (sscreen->b.chip_class == CIK &&
sscreen->b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
- tileb = 8 * 8 * bpe;
- tileb = MIN2(tile_split, tileb);
+ tileb = 8 * 8 * tex->surface.bpe;
+ tileb = MIN2(tex->surface.tile_split, tileb);
for (index = 0; tileb > 64; index++) {
tileb >>= 1;
return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
}
- if ((sscreen->b.chip_class == SI) &&
+ if (sscreen->b.chip_class == SI &&
sscreen->b.info.si_tile_mode_array_valid) {
+ /* Don't use stencil_tiling_index, because num_banks is always
+ * read from the depth mode. */
+ unsigned tile_mode_index = tex->surface.tiling_index[0];
assert(tile_mode_index < 32);
- return (sscreen->b.info.si_tile_mode_array[tile_mode_index] >> 20) & 0x3;
+ return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
}
/* The old way. */
}
/*
- * inferred framebuffer and blender state
+ * Inferred framebuffer and blender state.
+ *
+ * One of the reasons this must be derived from the framebuffer state is that:
+ * - The blend state mask is 0xf most of the time.
+ * - The COLOR1 format isn't INVALID because of possible dual-source blending,
+ * so COLOR1 is enabled pretty much all the time.
+ * So CB_TARGET_MASK is the only register that can disable COLOR1.
*/
static void si_update_fb_blend_state(struct si_context *sctx)
{
struct si_pm4_state *pm4;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t mask;
+ uint32_t mask = 0, i;
if (blend == NULL)
return;
- pm4 = si_pm4_alloc_state(sctx);
+ pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
- mask = (1ULL << ((unsigned)sctx->framebuffer.state.nr_cbufs * 4)) - 1;
+ for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
+ if (sctx->framebuffer.state.cbufs[i])
+ mask |= 0xf << (4*i);
mask &= blend->cb_target_mask;
- si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
+ si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
si_pm4_set_state(sctx, fb_blend, pm4);
}
const struct pipe_blend_color *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
const struct pipe_clip_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct pipe_constant_buffer cb;
if (pm4 == NULL)
si_pm4_set_state(sctx, clip, pm4);
}
+#define SIX_BITS 0x3F
+
+static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct tgsi_shader_info *info = si_get_vs_info(sctx);
+ struct si_shader *vs = si_get_vs_state(sctx);
+ unsigned window_space =
+ vs->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+ unsigned clipdist_mask =
+ info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
+
+ r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
+ S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
+ info->writes_edgeflag ||
+ info->writes_layer) |
+ (sctx->queued.named.rasterizer->clip_plane_enable &
+ clipdist_mask));
+ r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+ sctx->queued.named.rasterizer->pa_cl_clip_cntl |
+ (clipdist_mask ? 0 :
+ sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
+ S_028810_CLIP_DISABLE(window_space));
+}
+
static void si_set_scissor_states(struct pipe_context *ctx,
unsigned start_slot,
unsigned num_scissors,
return;
}
- pm4 = si_pm4_alloc_state(sctx);
+ pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
S_028814_POLY_MODE(polygon_dual_mode) |
S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
rs->pa_cl_clip_cntl =
S_028810_PS_UCP_MODE(3) |
+ S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
static void si_bind_rs_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_rasterizer *old_rs =
+ (struct si_state_rasterizer*)sctx->queued.named.rasterizer;
struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
if (state == NULL)
return;
// TODO
- sctx->sprite_coord_enable = rs->sprite_coord_enable;
sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
+ if (sctx->framebuffer.nr_samples > 1 &&
+ (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
+ sctx->db_render_state.dirty = true;
+
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_fb_rs_state(sctx);
+
+ sctx->clip_regs.dirty = true;
+ sctx->last_rast_prim = -1; /* reset this so that it gets updated */
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
*/
static void si_update_dsa_stencil_ref(struct si_context *sctx)
{
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct pipe_stencil_ref *ref = &sctx->stencil_ref;
struct si_state_dsa *dsa = sctx->queued.named.dsa;
struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
struct si_pm4_state *pm4 = &dsa->pm4;
unsigned db_depth_control;
- unsigned db_render_control;
uint32_t db_stencil_control = 0;
if (dsa == NULL) {
}
/* misc */
- db_render_control = 0;
si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
- si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control);
si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
return dsa;
si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
}
-static void *si_create_db_flush_dsa(struct si_context *sctx, bool copy_depth,
- bool copy_stencil, int sample)
+static void *si_create_db_flush_dsa(struct si_context *sctx)
{
- struct pipe_depth_stencil_alpha_state dsa;
- struct si_state_dsa *state;
+ struct pipe_depth_stencil_alpha_state dsa = {};
- memset(&dsa, 0, sizeof(dsa));
+ return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
+}
- state = sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
- if (copy_depth || copy_stencil) {
- si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
- S_028000_DEPTH_COPY(copy_depth) |
- S_028000_STENCIL_COPY(copy_stencil) |
- S_028000_COPY_CENTROID(1) |
- S_028000_COPY_SAMPLE(sample));
+/* DB RENDER STATE */
+
+static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+
+ sctx->db_render_state.dirty = true;
+}
+
+static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned db_shader_control;
+
+ r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+
+ /* DB_RENDER_CONTROL */
+ if (sctx->dbcb_depth_copy_enabled ||
+ sctx->dbcb_stencil_copy_enabled) {
+ radeon_emit(cs,
+ S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
+ S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
+ S_028000_COPY_CENTROID(1) |
+ S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
+ } else if (sctx->db_inplace_flush_enabled) {
+ radeon_emit(cs,
+ S_028000_DEPTH_COMPRESS_DISABLE(1) |
+ S_028000_STENCIL_COMPRESS_DISABLE(1));
+ } else if (sctx->db_depth_clear) {
+ radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
} else {
- si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
- S_028000_DEPTH_COMPRESS_DISABLE(1) |
- S_028000_STENCIL_COMPRESS_DISABLE(1));
+ radeon_emit(cs, 0);
}
- return state;
+ /* DB_COUNT_CONTROL (occlusion queries) */
+ if (sctx->b.num_occlusion_queries > 0) {
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs,
+ S_028004_PERFECT_ZPASS_COUNTS(1) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
+ S_028004_ZPASS_ENABLE(1) |
+ S_028004_SLICE_EVEN_ENABLE(1) |
+ S_028004_SLICE_ODD_ENABLE(1));
+ } else {
+ radeon_emit(cs,
+ S_028004_PERFECT_ZPASS_COUNTS(1) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
+ }
+ } else {
+ /* Disable occlusion queries. */
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, 0);
+ } else {
+ radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
+ }
+ }
+
+ /* DB_RENDER_OVERRIDE2 */
+ if (sctx->db_depth_disable_expclear) {
+ r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+ S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
+ } else {
+ r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
+ }
+
+ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+ S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
+ sctx->ps_db_shader_control;
+
+ /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
+ if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
+ db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
+
+ r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+ db_shader_control);
}
/*
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
+ PIPE_BIND_SHARED |
+ PIPE_BIND_BLENDABLE)) &&
si_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
+ if (!util_format_is_pure_integer(format) &&
+ !util_format_is_depth_or_stencil(format))
+ retval |= usage & PIPE_BIND_BLENDABLE;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
struct si_screen *sscreen = sctx->screen;
struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
unsigned level = surf->base.u.tex.level;
- unsigned pitch, slice, format, tile_mode_index, array_mode;
+ struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
+ unsigned format, tile_mode_index, array_mode;
unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
uint32_t z_info, s_info, db_depth_info;
uint64_t z_offs, s_offs;
z_offs += rtex->surface.level[level].offset;
s_offs += rtex->surface.stencil_level[level].offset;
- pitch = (rtex->surface.level[level].nblk_x / 8) - 1;
- slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
- if (slice) {
- slice = slice - 1;
- }
-
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
z_info = S_028040_FORMAT(format);
macro_aspect = cik_macro_tile_aspect(macro_aspect);
bankw = cik_bank_wh(bankw);
bankh = cik_bank_wh(bankh);
- nbanks = si_num_banks(sscreen, rtex->surface.bpe, rtex->surface.tile_split,
- ~0);
+ nbanks = si_num_banks(sscreen, rtex);
tile_mode_index = si_tile_mode_index(rtex, level, false);
pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
/* HiZ aka depth buffer htile */
/* use htile only for first level */
if (rtex->htile_buffer && !level) {
- const struct util_format_description *fmt_desc;
-
- z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+ z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
+ S_028040_ALLOW_EXPCLEAR(1);
/* This is optimal for the clear value of 1.0 and using
* the LESS and LEQUAL test functions. Set this to 0
* clearing. */
z_info |= S_028040_ZRANGE_PRECISION(1);
- fmt_desc = util_format_description(rtex->resource.b.b.format);
- if (!util_format_has_stencil(fmt_desc)) {
- /* Use all of the htile_buffer for depth */
- s_info |= S_028044_TILE_STENCIL_DISABLE(1);
- }
+ /* Use all of the htile_buffer for depth, because we don't
+ * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
+ s_info |= S_028044_TILE_STENCIL_DISABLE(1);
uint64_t va = rtex->htile_buffer->gpu_address;
db_htile_data_base = va >> 8;
db_htile_surface = 0;
}
+ assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
+
surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
surf->db_htile_data_base = db_htile_data_base;
surf->db_stencil_info = s_info;
surf->db_depth_base = z_offs >> 8;
surf->db_stencil_base = s_offs >> 8;
- surf->db_depth_size = S_028058_PITCH_TILE_MAX(pitch);
- surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice);
+ surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
+ S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
+ surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
+ levelinfo->nblk_y) / 64 - 1);
surf->db_htile_surface = db_htile_surface;
surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
struct pipe_constant_buffer constbuf = {0};
struct r600_surface *surf = NULL;
struct r600_texture *rtex;
+ bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
+ unsigned old_nr_samples = sctx->framebuffer.nr_samples;
int i;
if (sctx->framebuffer.state.nr_cbufs) {
sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
util_format_is_pure_integer(state->cbufs[0]->format);
+ if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
+ sctx->db_render_state.dirty = true;
+
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
continue;
si_update_fb_blend_state(sctx);
sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
- sctx->framebuffer.atom.num_dw += state->zsbuf ? 23 : 4;
+ sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
sctx->framebuffer.atom.dirty = true;
- sctx->msaa_config.dirty = true;
- /* Set sample locations as fragment shader constants. */
- switch (sctx->framebuffer.nr_samples) {
- case 1:
- constbuf.user_buffer = sctx->b.sample_locations_1x;
- break;
- case 2:
- constbuf.user_buffer = sctx->b.sample_locations_2x;
- break;
- case 4:
- constbuf.user_buffer = sctx->b.sample_locations_4x;
- break;
- case 8:
- constbuf.user_buffer = sctx->b.sample_locations_8x;
- break;
- case 16:
- constbuf.user_buffer = sctx->b.sample_locations_16x;
- break;
- default:
- assert(0);
+ if (sctx->framebuffer.nr_samples != old_nr_samples) {
+ sctx->msaa_config.dirty = true;
+ sctx->db_render_state.dirty = true;
+
+ /* Set sample locations as fragment shader constants. */
+ switch (sctx->framebuffer.nr_samples) {
+ case 1:
+ constbuf.user_buffer = sctx->b.sample_locations_1x;
+ break;
+ case 2:
+ constbuf.user_buffer = sctx->b.sample_locations_2x;
+ break;
+ case 4:
+ constbuf.user_buffer = sctx->b.sample_locations_4x;
+ break;
+ case 8:
+ constbuf.user_buffer = sctx->b.sample_locations_8x;
+ break;
+ case 16:
+ constbuf.user_buffer = sctx->b.sample_locations_16x;
+ break;
+ default:
+ assert(0);
+ }
+ constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
+ ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
+ SI_DRIVER_STATE_CONST_BUF, &constbuf);
}
- constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
- SI_DRIVER_STATE_CONST_BUF, &constbuf);
}
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
} else {
sctx->msaa_config.dirty = true;
}
-/*
- * shaders
- */
-
-/* Compute the key for the hw shader variant */
-static INLINE void si_shader_selector_key(struct pipe_context *ctx,
- struct si_pipe_shader_selector *sel,
- union si_shader_key *key)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- memset(key, 0, sizeof(*key));
-
- if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) &&
- sctx->queued.named.rasterizer) {
- if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
- key->vs.ucps_enabled |= 0x2;
- if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf)
- key->vs.ucps_enabled |= 0x1;
- }
-
- if (sel->type == PIPE_SHADER_VERTEX) {
- unsigned i;
- if (!sctx->vertex_elements)
- return;
-
- for (i = 0; i < sctx->vertex_elements->count; ++i)
- key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
-
- key->vs.as_es = sctx->gs_shader != NULL;
- } else if (sel->type == PIPE_SHADER_FRAGMENT) {
- if (sel->fs_write_all)
- key->ps.nr_cbufs = sctx->framebuffer.state.nr_cbufs;
- key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
-
- if (sctx->queued.named.rasterizer) {
- key->ps.color_two_side = sctx->queued.named.rasterizer->two_side;
- key->ps.flatshade = sctx->queued.named.rasterizer->flatshade;
- key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 &&
- sctx->ps_iter_samples == sctx->framebuffer.nr_samples;
-
- if (sctx->queued.named.blend) {
- key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
- sctx->queued.named.rasterizer->multisample_enable &&
- !sctx->framebuffer.cb0_is_integer;
- }
- }
- if (sctx->queued.named.dsa) {
- key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
-
- /* Alpha-test should be disabled if colorbuffer 0 is integer. */
- if (sctx->framebuffer.cb0_is_integer)
- key->ps.alpha_func = PIPE_FUNC_ALWAYS;
- } else {
- key->ps.alpha_func = PIPE_FUNC_ALWAYS;
- }
- }
-}
-
-/* Select the hw shader variant depending on the current state. */
-int si_shader_select(struct pipe_context *ctx,
- struct si_pipe_shader_selector *sel)
-{
- union si_shader_key key;
- struct si_pipe_shader * shader = NULL;
- int r;
-
- si_shader_selector_key(ctx, sel, &key);
-
- /* Check if we don't need to change anything.
- * This path is also used for most shaders that don't need multiple
- * variants, it will cost just a computation of the key and this
- * test. */
- if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
- return 0;
- }
-
- /* lookup if we have other variants in the list */
- if (sel->num_shaders > 1) {
- struct si_pipe_shader *p = sel->current, *c = p->next_variant;
-
- while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
- p = c;
- c = c->next_variant;
- }
-
- if (c) {
- p->next_variant = c->next_variant;
- shader = c;
- }
- }
-
- if (shader) {
- shader->next_variant = sel->current;
- sel->current = shader;
- } else {
- shader = CALLOC(1, sizeof(struct si_pipe_shader));
- shader->selector = sel;
- shader->key = key;
-
- shader->next_variant = sel->current;
- sel->current = shader;
- r = si_pipe_shader_create(ctx, shader);
- if (unlikely(r)) {
- R600_ERR("Failed to build shader variant (type=%u) %d\n",
- sel->type, r);
- sel->current = NULL;
- FREE(shader);
- return r;
- }
- sel->num_shaders++;
- }
-
- return 0;
-}
-
-static void *si_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state,
- unsigned pipe_shader_type)
-{
- struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
- int r;
-
- sel->type = pipe_shader_type;
- sel->tokens = tgsi_dup_tokens(state->tokens);
- sel->so = state->stream_output;
-
- if (pipe_shader_type == PIPE_SHADER_FRAGMENT) {
- struct tgsi_shader_info info;
-
- tgsi_scan_shader(state->tokens, &info);
- sel->fs_write_all = info.color0_writes_all_cbufs;
- }
-
- r = si_shader_select(ctx, sel);
- if (r) {
- free(sel);
- return NULL;
- }
-
- return sel;
-}
-
-static void *si_create_fs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
-}
-
-static void *si_create_gs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
-}
-
-static void *si_create_vs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
-}
-
-static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = state;
-
- if (sctx->vs_shader == sel)
- return;
-
- if (!sel || !sel->current)
- return;
-
- sctx->vs_shader = sel;
-}
-
-static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = state;
-
- if (sctx->gs_shader == sel)
- return;
-
- sctx->gs_shader = sel;
-}
-
-static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = state;
-
- /* skip if supplied shader is one already in use */
- if (sctx->ps_shader == sel)
- return;
-
- /* use dummy shader if supplied shader is corrupt */
- if (!sel || !sel->current)
- sel = sctx->dummy_pixel_shader;
-
- sctx->ps_shader = sel;
-}
-
-static void si_delete_shader_selector(struct pipe_context *ctx,
- struct si_pipe_shader_selector *sel)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader *p = sel->current, *c;
-
- while (p) {
- c = p->next_variant;
- if (sel->type == PIPE_SHADER_GEOMETRY)
- si_pm4_delete_state(sctx, gs, p->pm4);
- else if (sel->type == PIPE_SHADER_FRAGMENT)
- si_pm4_delete_state(sctx, ps, p->pm4);
- else if (p->key.vs.as_es)
- si_pm4_delete_state(sctx, es, p->pm4);
- else
- si_pm4_delete_state(sctx, vs, p->pm4);
- si_pipe_shader_destroy(ctx, p);
- free(p);
- p = c;
- }
-
- free(sel->tokens);
- free(sel);
- }
-
-static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
-
- if (sctx->vs_shader == sel) {
- sctx->vs_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
-
- if (sctx->gs_shader == sel) {
- sctx->gs_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
-
- if (sctx->ps_shader == sel) {
- sctx->ps_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
/*
* Samplers
*/
const struct pipe_sampler_view *state)
{
struct si_context *sctx = (struct si_context*)ctx;
- struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
+ struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
struct r600_texture *tmp = (struct r600_texture*)texture;
const struct util_format_description *desc;
unsigned format, num_format;
static void si_sampler_view_destroy(struct pipe_context *ctx,
struct pipe_sampler_view *state)
{
- struct si_pipe_sampler_view *view = (struct si_pipe_sampler_view *)state;
+ struct si_sampler_view *view = (struct si_sampler_view *)state;
if (view->resource->b.b.target == PIPE_BUFFER)
LIST_DELINIT(&view->list);
static void *si_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
- struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
+ struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
unsigned border_color_type;
static void si_set_border_colors(struct si_context *sctx, unsigned count,
void **states)
{
- struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
+ struct si_sampler_state **rstates = (struct si_sampler_state **)states;
uint32_t *border_color_table = NULL;
int i, j;
}
if (border_color_table) {
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
uint64_t va_offset = sctx->border_color_table->gpu_address;
return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
}
-static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
-{
- /* XXX Turn this into a proper state. Right now the queries are
- * enabled in draw_vbo, which snoops r600_common_context to see
- * if any occlusion queries are active. */
-}
-
static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
void si_init_state_functions(struct si_context *sctx)
{
- int i;
-
si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
+ si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
+ si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
sctx->b.b.create_blend_state = si_create_blend_state;
sctx->b.b.bind_blend_state = si_bind_blend_state;
sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
- for (i = 0; i < 8; i++) {
- sctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(sctx, true, true, i);
- sctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(sctx, true, false, i);
- sctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(sctx, false, true, i);
- }
- sctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(sctx, false, false, 0);
+ sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
sctx->b.b.get_sample_position = cayman_get_sample_position;
- sctx->b.b.create_vs_state = si_create_vs_state;
- sctx->b.b.create_fs_state = si_create_fs_state;
- sctx->b.b.bind_vs_state = si_bind_vs_shader;
- sctx->b.b.bind_fs_state = si_bind_ps_shader;
- sctx->b.b.delete_vs_state = si_delete_vs_shader;
- sctx->b.b.delete_fs_state = si_delete_ps_shader;
-
- sctx->b.b.create_gs_state = si_create_gs_state;
- sctx->b.b.bind_gs_state = si_bind_gs_shader;
- sctx->b.b.delete_gs_state = si_delete_gs_shader;
-
sctx->b.b.create_sampler_state = si_create_sampler_state;
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
sctx->b.b.draw_vbo = si_draw_vbo;
}
+static void
+si_write_harvested_raster_configs(struct si_context *sctx,
+ struct si_pm4_state *pm4,
+ unsigned raster_config)
+{
+ unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
+ unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
+ unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned num_rb = sctx->screen->b.info.r600_num_backends;
+ unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se0_mask = (1 << rb_per_se) - 1;
+ unsigned se1_mask = se0_mask << rb_per_se;
+ unsigned se;
+
+ assert(num_se == 1 || num_se == 2);
+ assert(sh_per_se == 1 || sh_per_se == 2);
+ assert(rb_per_pkr == 1 || rb_per_pkr == 2);
+
+ /* XXX: I can't figure out what the *_XSEL and *_YSEL
+ * fields are for, so I'm leaving them as their default
+ * values. */
+
+ se0_mask &= rb_mask;
+ se1_mask &= rb_mask;
+ if (num_se == 2 && (!se0_mask || !se1_mask)) {
+ raster_config &= C_028350_SE_MAP;
+
+ if (!se0_mask) {
+ raster_config |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= C_028350_PKR_MAP;
+
+ if (!pkr0_mask) {
+ raster_config_se |=
+ S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se |=
+ S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_pkr == 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= C_028350_RB_MAP_PKR0;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (sh_per_se == 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= C_028350_RB_MAP_PKR1;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_INDEX(se) | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
+ }
+
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
+}
+
void si_init_config(struct si_context *sctx)
{
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- if (sctx->b.chip_class == SI) {
- si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
- S_028AA8_SWITCH_ON_EOP(1) |
- S_028AA8_PARTIAL_VS_WAVE_ON(1) |
- S_028AA8_PRIMGROUP_SIZE(63));
- }
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->b.chip_class < CIK)
break;
}
} else {
+ unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned num_rb = sctx->screen->b.info.r600_num_backends;
+ unsigned raster_config;
+
switch (sctx->screen->b.family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
+ raster_config = 0x2a00126a;
break;
case CHIP_VERDE:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
+ raster_config = 0x0000124a;
break;
case CHIP_OLAND:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
+ raster_config = 0x00000082;
break;
case CHIP_HAINAN:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+ raster_config = 0x00000000;
break;
default:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+ fprintf(stderr,
+ "radeonsi: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
break;
}
+
+ /* Always use the default config when all backends are enabled
+ * (or when we failed to determine the enabled backends).
+ */
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+ raster_config);
+ } else {
+ si_write_harvested_raster_configs(sctx, pm4, raster_config);
+ }
}
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000);
si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000);
- si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000);
si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
- si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+
+ /* There is a hang if stencil is used and fast stencil is enabled
+ * regardless of whether HTILE is depth-only or not.
+ */
si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+ S_02800C_FAST_STENCIL_DISABLE(1));
+
si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);