#include "si_pipe.h"
#include "si_shader.h"
#include "sid.h"
-#include "../radeon/r600_cs.h"
+#include "radeon/r600_cs.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
void (*emit)(struct si_context *ctx, struct r600_atom *state),
*list_elem = atom;
}
-static uint32_t cik_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split)
+uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
- if (sscreen->b.info.cik_macrotile_mode_array_valid) {
+ if (sscreen->b.chip_class == CIK &&
+ sscreen->b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
- tileb = 8 * 8 * bpe;
- tileb = MIN2(tile_split, tileb);
+ tileb = 8 * 8 * tex->surface.bpe;
+ tileb = MIN2(tex->surface.tile_split, tileb);
for (index = 0; tileb > 64; index++) {
tileb >>= 1;
}
-
assert(index < 16);
return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
}
+ if (sscreen->b.chip_class == SI &&
+ sscreen->b.info.si_tile_mode_array_valid) {
+ /* Don't use stencil_tiling_index, because num_banks is always
+ * read from the depth mode. */
+ unsigned tile_mode_index = tex->surface.tiling_index[0];
+ assert(tile_mode_index < 32);
+
+ return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
+ }
+
/* The old way. */
switch (sscreen->b.tiling_info.num_banks) {
case 2:
cb.user_buffer = state->ucp;
cb.buffer_offset = 0;
cb.buffer_size = 4*4*8;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, NUM_PIPE_CONST_BUFFERS, &cb);
+ ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
pipe_resource_reference(&cb.buffer, NULL);
si_pm4_set_state(sctx, clip, pm4);
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
+ struct si_pm4_state *pm4 = &scissor->pm4;
- if (pm4 == NULL)
+ if (scissor == NULL)
return;
+ scissor->scissor = *state;
si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
S_028250_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
- si_pm4_set_state(sctx, scissor, pm4);
+ si_pm4_set_state(sctx, scissor, scissor);
}
static void si_set_viewport_states(struct pipe_context *ctx,
struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
struct si_pm4_state *pm4 = &dsa->pm4;
unsigned db_depth_control;
- unsigned db_render_control;
uint32_t db_stencil_control = 0;
if (dsa == NULL) {
}
/* misc */
- db_render_control = 0;
si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
- si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control);
si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
return dsa;
si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
}
-static void *si_create_db_flush_dsa(struct si_context *sctx, bool copy_depth,
- bool copy_stencil, int sample)
+static void *si_create_db_flush_dsa(struct si_context *sctx)
+{
+ struct pipe_depth_stencil_alpha_state dsa = {};
+
+ return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
+}
+
+/* DB RENDER STATE */
+
+static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
- struct pipe_depth_stencil_alpha_state dsa;
- struct si_state_dsa *state;
+ struct si_context *sctx = (struct si_context*)ctx;
- memset(&dsa, 0, sizeof(dsa));
+ sctx->db_render_state.dirty = true;
+}
+
+static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- state = sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
- if (copy_depth || copy_stencil) {
- si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
- S_028000_DEPTH_COPY(copy_depth) |
- S_028000_STENCIL_COPY(copy_stencil) |
- S_028000_COPY_CENTROID(1) |
- S_028000_COPY_SAMPLE(sample));
+ r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+
+ /* DB_RENDER_CONTROL */
+ if (sctx->dbcb_depth_copy_enabled ||
+ sctx->dbcb_stencil_copy_enabled) {
+ radeon_emit(cs,
+ S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
+ S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
+ S_028000_COPY_CENTROID(1) |
+ S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
+ } else if (sctx->db_inplace_flush_enabled) {
+ radeon_emit(cs,
+ S_028000_DEPTH_COMPRESS_DISABLE(1) |
+ S_028000_STENCIL_COMPRESS_DISABLE(1));
+ } else if (sctx->db_depth_clear) {
+ radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
} else {
- si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
- S_028000_DEPTH_COMPRESS_DISABLE(1) |
- S_028000_STENCIL_COMPRESS_DISABLE(1));
+ radeon_emit(cs, 0);
}
- return state;
+ /* DB_COUNT_CONTROL (occlusion queries) */
+ if (sctx->b.num_occlusion_queries > 0) {
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs,
+ S_028004_PERFECT_ZPASS_COUNTS(1) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
+ S_028004_ZPASS_ENABLE(1) |
+ S_028004_SLICE_EVEN_ENABLE(1) |
+ S_028004_SLICE_ODD_ENABLE(1));
+ } else {
+ radeon_emit(cs,
+ S_028004_PERFECT_ZPASS_COUNTS(1) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
+ }
+ } else {
+ /* Disable occlusion queries. */
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, 0);
+ } else {
+ radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
+ }
+ }
+
+ /* DB_RENDER_OVERRIDE2 */
+ if (sctx->db_depth_disable_expclear) {
+ r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+ S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
+ } else {
+ r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
+ }
+
+ r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+ S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+ S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
+ sctx->ps_db_shader_control);
}
/*
}
}
+ if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+ if (!enable_s3tc)
+ goto out_unknown;
+
+ switch (format) {
+ case PIPE_FORMAT_BPTC_RGBA_UNORM:
+ case PIPE_FORMAT_BPTC_SRGBA:
+ return V_008F14_IMG_DATA_FORMAT_BC7;
+ case PIPE_FORMAT_BPTC_RGB_FLOAT:
+ case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+ return V_008F14_IMG_DATA_FORMAT_BC6;
+ default:
+ goto out_unknown;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ switch (format) {
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ case PIPE_FORMAT_G8R8_B8R8_UNORM:
+ return V_008F14_IMG_DATA_FORMAT_GB_GR;
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ case PIPE_FORMAT_R8G8_R8B8_UNORM:
+ return V_008F14_IMG_DATA_FORMAT_BG_RG;
+ default:
+ goto out_unknown;
+ }
+ }
+
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
if (!enable_s3tc)
case PIPE_TEXTURE_3D:
return V_008F1C_SQ_RSRC_IMG_3D;
case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
return V_008F1C_SQ_RSRC_IMG_CUBE;
}
}
if (type == UTIL_FORMAT_TYPE_FIXED)
return V_008F0C_BUF_DATA_FORMAT_INVALID;
+ if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return V_008F0C_BUF_DATA_FORMAT_10_11_11;
+
if (desc->nr_channels == 4 &&
desc->channel[0].size == 10 &&
desc->channel[1].size == 10 &&
const struct util_format_description *desc,
int first_non_void)
{
+ if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return V_008F0C_BUF_NUM_FORMAT_FLOAT;
+
switch (desc->channel[first_non_void].type) {
case UTIL_FORMAT_TYPE_SIGNED:
if (desc->channel[first_non_void].normalized)
return FALSE;
if (sample_count > 1) {
- if (HAVE_LLVM < 0x0304)
- return FALSE;
-
/* 2D tiling on CIK is supported since DRM 2.35.0 */
if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
return FALSE;
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
+ PIPE_BIND_SHARED |
+ PIPE_BIND_BLENDABLE)) &&
si_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
+ if (!util_format_is_pure_integer(format) &&
+ !util_format_is_depth_or_stencil(format))
+ retval |= usage & PIPE_BIND_BLENDABLE;
}
if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
}
}
- offset += r600_resource_va(sctx->b.b.screen, surf->base.texture);
+ offset += rtex->resource.gpu_address;
surf->cb_color_base = offset >> 8;
surf->cb_color_pitch = color_pitch;
struct si_screen *sscreen = sctx->screen;
struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
unsigned level = surf->base.u.tex.level;
- unsigned pitch, slice, format, tile_mode_index, array_mode;
+ struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
+ unsigned format, tile_mode_index, array_mode;
unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
uint32_t z_info, s_info, db_depth_info;
uint64_t z_offs, s_offs;
}
assert(format != V_028040_Z_INVALID);
- s_offs = z_offs = r600_resource_va(sctx->b.b.screen, surf->base.texture);
+ s_offs = z_offs = rtex->resource.gpu_address;
z_offs += rtex->surface.level[level].offset;
s_offs += rtex->surface.stencil_level[level].offset;
- pitch = (rtex->surface.level[level].nblk_x / 8) - 1;
- slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
- if (slice) {
- slice = slice - 1;
- }
-
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
z_info = S_028040_FORMAT(format);
macro_aspect = cik_macro_tile_aspect(macro_aspect);
bankw = cik_bank_wh(bankw);
bankh = cik_bank_wh(bankh);
- nbanks = cik_num_banks(sscreen, rtex->surface.bpe, rtex->surface.tile_split);
+ nbanks = si_num_banks(sscreen, rtex);
tile_mode_index = si_tile_mode_index(rtex, level, false);
pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
/* HiZ aka depth buffer htile */
/* use htile only for first level */
if (rtex->htile_buffer && !level) {
- const struct util_format_description *fmt_desc;
-
- z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+ z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
+ S_028040_ALLOW_EXPCLEAR(1);
/* This is optimal for the clear value of 1.0 and using
* the LESS and LEQUAL test functions. Set this to 0
* clearing. */
z_info |= S_028040_ZRANGE_PRECISION(1);
- fmt_desc = util_format_description(rtex->resource.b.b.format);
- if (!util_format_has_stencil(fmt_desc)) {
- /* Use all of the htile_buffer for depth */
- s_info |= S_028044_TILE_STENCIL_DISABLE(1);
- }
+ /* Use all of the htile_buffer for depth, because we don't
+ * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
+ s_info |= S_028044_TILE_STENCIL_DISABLE(1);
- uint64_t va = r600_resource_va(&sctx->screen->b.b, &rtex->htile_buffer->b.b);
+ uint64_t va = rtex->htile_buffer->gpu_address;
db_htile_data_base = va >> 8;
db_htile_surface = S_028ABC_FULL_CACHE(1);
} else {
db_htile_surface = 0;
}
+ assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
+
surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
surf->db_htile_data_base = db_htile_data_base;
surf->db_stencil_info = s_info;
surf->db_depth_base = z_offs >> 8;
surf->db_stencil_base = s_offs >> 8;
- surf->db_depth_size = S_028058_PITCH_TILE_MAX(pitch);
- surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice);
+ surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
+ S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
+ surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
+ levelinfo->nblk_y) / 64 - 1);
surf->db_htile_surface = db_htile_surface;
surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
const struct pipe_framebuffer_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct pipe_constant_buffer constbuf = {0};
struct r600_surface *surf = NULL;
struct r600_texture *rtex;
+ bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
int i;
if (sctx->framebuffer.state.nr_cbufs) {
sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
util_format_is_pure_integer(state->cbufs[0]->format);
+ if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
+ sctx->db_render_state.dirty = true;
+
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
continue;
si_update_fb_blend_state(sctx);
sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
- sctx->framebuffer.atom.num_dw += state->zsbuf ? 23 : 4;
+ sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
- sctx->framebuffer.atom.num_dw += 25; /* MSAA */
+ sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
sctx->framebuffer.atom.dirty = true;
+ sctx->msaa_config.dirty = true;
+
+ /* Set sample locations as fragment shader constants. */
+ switch (sctx->framebuffer.nr_samples) {
+ case 1:
+ constbuf.user_buffer = sctx->b.sample_locations_1x;
+ break;
+ case 2:
+ constbuf.user_buffer = sctx->b.sample_locations_2x;
+ break;
+ case 4:
+ constbuf.user_buffer = sctx->b.sample_locations_4x;
+ break;
+ case 8:
+ constbuf.user_buffer = sctx->b.sample_locations_8x;
+ break;
+ case 16:
+ constbuf.user_buffer = sctx->b.sample_locations_16x;
+ break;
+ default:
+ assert(0);
+ }
+ constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
+ ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
+ SI_DRIVER_STATE_CONST_BUF, &constbuf);
}
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
} else {
r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
- cayman_emit_msaa_state(cs, sctx->framebuffer.nr_samples);
+ cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples);
+}
+
+static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct si_context *sctx = (struct si_context *)rctx;
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+
+ cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
+ sctx->ps_iter_samples);
+}
+
+const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
+
+static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ if (sctx->ps_iter_samples == min_samples)
+ return;
+
+ sctx->ps_iter_samples = min_samples;
+
+ if (sctx->framebuffer.nr_samples > 1)
+ sctx->msaa_config.dirty = true;
}
/*
if (sctx->queued.named.rasterizer) {
key->ps.color_two_side = sctx->queued.named.rasterizer->two_side;
key->ps.flatshade = sctx->queued.named.rasterizer->flatshade;
+ key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples == sctx->framebuffer.nr_samples;
if (sctx->queued.named.blend) {
key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
}
-#if HAVE_LLVM >= 0x0305
-
static void *si_create_gs_state(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
}
-#endif
-
static void *si_create_vs_state(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
sctx->vs_shader = sel;
}
-#if HAVE_LLVM >= 0x0305
-
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->gs_shader = sel;
}
-#endif
-
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
return;
/* use dummy shader if supplied shader is corrupt */
- if (!sel || !sel->current)
+ if (!sel || !sel->current) {
+ if (!sctx->dummy_pixel_shader) {
+ sctx->dummy_pixel_shader =
+ util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
+ TGSI_SEMANTIC_GENERIC,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+
sel = sctx->dummy_pixel_shader;
+ }
sctx->ps_shader = sel;
}
si_delete_shader_selector(ctx, sel);
}
-#if HAVE_LLVM >= 0x0305
-
static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_delete_shader_selector(ctx, sel);
}
-#endif
-
static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
struct r600_texture *tmp = (struct r600_texture*)texture;
const struct util_format_description *desc;
desc = util_format_description(state->format);
first_non_void = util_format_get_first_non_void_channel(state->format);
stride = desc->block.bits / 8;
- va = r600_resource_va(ctx->screen, texture) + state->u.buf.first_element*stride;
+ va = tmp->resource.gpu_address + state->u.buf.first_element*stride;
format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(format);
+
+ LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
return &view->base;
}
case PIPE_FORMAT_DXT1_SRGBA:
case PIPE_FORMAT_DXT3_SRGBA:
case PIPE_FORMAT_DXT5_SRGBA:
+ case PIPE_FORMAT_BPTC_SRGBA:
num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
break;
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_SNORM:
case PIPE_FORMAT_RGTC2_SNORM:
case PIPE_FORMAT_LATC2_SNORM:
+ /* implies float, so use SNORM/UNORM to determine
+ whether data is signed or not */
+ case PIPE_FORMAT_BPTC_RGB_FLOAT:
num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
break;
default:
num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
break;
}
+ } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
} else {
num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
}
depth = texture->array_size;
} else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
depth = texture->array_size;
- }
+ } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
+ depth = texture->array_size / 6;
- va = r600_resource_va(ctx->screen, texture);
- va += surflevel[0].offset;
+ va = tmp->resource.gpu_address + surflevel[0].offset;
va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
view->state[0] = va >> 8;
/* Initialize the sampler view for FMASK. */
if (tmp->fmask.size) {
- uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset;
+ uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset;
uint32_t fmask_format;
switch (texture->nr_samples) {
static void si_sampler_view_destroy(struct pipe_context *ctx,
struct pipe_sampler_view *state)
{
- struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
+ struct si_pipe_sampler_view *view = (struct si_pipe_sampler_view *)state;
+
+ if (view->resource->b.b.target == PIPE_BUFFER)
+ LIST_DELINIT(&view->list);
pipe_resource_reference(&state->texture, NULL);
- FREE(resource);
+ FREE(view);
}
static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
- (state->max_anisotropy & 0x7) << 9 | /* XXX */
+ r600_tex_aniso_filter(state->max_anisotropy) << 9 |
S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
- aniso_flag_offset << 16 | /* XXX */
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
- S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
- S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
+ S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
+ S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
return rstate;
}
-/* XXX consider moving this function to si_descriptors.c for gcc to inline
- * the si_set_sampler_view calls. LTO might help too. */
-static void si_set_sampler_views(struct pipe_context *ctx,
- unsigned shader, unsigned start,
- unsigned count,
- struct pipe_sampler_view **views)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_textures_info *samplers = &sctx->samplers[shader];
- struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
- int i;
-
- if (shader >= SI_NUM_SHADERS)
- return;
-
- assert(start == 0);
-
- for (i = 0; i < count; i++) {
- if (!views[i]) {
- samplers->depth_texture_mask &= ~(1 << i);
- samplers->compressed_colortex_mask &= ~(1 << i);
- si_set_sampler_view(sctx, shader, i, NULL, NULL);
- si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
- NULL, NULL);
- continue;
- }
-
- si_set_sampler_view(sctx, shader, i, views[i], rviews[i]->state);
-
- if (views[i]->texture->target != PIPE_BUFFER) {
- struct r600_texture *rtex =
- (struct r600_texture*)views[i]->texture;
-
- if (rtex->is_depth && !rtex->is_flushing_texture) {
- samplers->depth_texture_mask |= 1 << i;
- } else {
- samplers->depth_texture_mask &= ~(1 << i);
- }
- if (rtex->cmask.size || rtex->fmask.size) {
- samplers->compressed_colortex_mask |= 1 << i;
- } else {
- samplers->compressed_colortex_mask &= ~(1 << i);
- }
-
- if (rtex->fmask.size) {
- si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
- views[i], rviews[i]->fmask_state);
- } else {
- si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
- NULL, NULL);
- }
- }
- }
- for (; i < samplers->n_views; i++) {
- samplers->depth_texture_mask &= ~(1 << i);
- samplers->compressed_colortex_mask &= ~(1 << i);
- si_set_sampler_view(sctx, shader, i, NULL, NULL);
- si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
- NULL, NULL);
- }
-
- samplers->n_views = count;
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
-}
-
-static void si_set_sampler_states(struct si_context *sctx,
- struct si_pm4_state *pm4,
- unsigned count, void **states,
- struct si_textures_info *samplers,
- unsigned user_data_reg)
+/* Upload border colors and update the pointers in resource descriptors.
+ * There can only be 4096 border colors per context.
+ *
+ * XXX: This is broken if the buffer gets reallocated.
+ */
+static void si_set_border_colors(struct si_context *sctx, unsigned count,
+ void **states)
{
struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
uint32_t *border_color_table = NULL;
int i, j;
- if (!count)
- goto out;
-
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
-
- si_pm4_sh_data_begin(pm4);
for (i = 0; i < count; i++) {
if (rstates[i] &&
G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
sctx->border_color_table =
si_resource_create_custom(&sctx->screen->b.b,
- PIPE_USAGE_STAGING,
+ PIPE_USAGE_DYNAMIC,
4096 * 4 * 4);
}
rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
}
-
- for (j = 0; j < Elements(rstates[i]->val); ++j) {
- si_pm4_sh_data_add(pm4, rstates[i] ? rstates[i]->val[j] : 0);
- }
}
- si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_SAMPLER);
if (border_color_table) {
- uint64_t va_offset =
- r600_resource_va(&sctx->screen->b.b,
- (void*)sctx->border_color_table);
+ struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+
+ uint64_t va_offset = sctx->border_color_table->gpu_address;
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
- sctx->b.ws->buffer_unmap(sctx->border_color_table->cs_buf);
si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
+ si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
}
-
- memcpy(samplers->samplers, states, sizeof(void*) * count);
-
-out:
- samplers->n_samplers = count;
}
-static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
-
- si_set_sampler_states(sctx, pm4, count, states,
- &sctx->samplers[PIPE_SHADER_VERTEX],
- R_00B130_SPI_SHADER_USER_DATA_VS_0);
-#if HAVE_LLVM >= 0x0305
- si_set_sampler_states(sctx, pm4, count, states,
- &sctx->samplers[PIPE_SHADER_VERTEX],
- R_00B330_SPI_SHADER_USER_DATA_ES_0);
-#endif
- si_pm4_set_state(sctx, vs_sampler, pm4);
-}
-
-static void si_bind_gs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
-
- si_set_sampler_states(sctx, pm4, count, states,
- &sctx->samplers[PIPE_SHADER_GEOMETRY],
- R_00B230_SPI_SHADER_USER_DATA_GS_0);
- si_pm4_set_state(sctx, gs_sampler, pm4);
-}
-
-static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
-
- si_set_sampler_states(sctx, pm4, count, states,
- &sctx->samplers[PIPE_SHADER_FRAGMENT],
- R_00B030_SPI_SHADER_USER_DATA_PS_0);
- si_pm4_set_state(sctx, ps_sampler, pm4);
-}
-
-
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
unsigned start, unsigned count,
void **states)
{
- assert(start == 0);
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- si_bind_vs_sampler_states(ctx, count, states);
- break;
- case PIPE_SHADER_GEOMETRY:
- si_bind_gs_sampler_states(ctx, count, states);
- break;
- case PIPE_SHADER_FRAGMENT:
- si_bind_ps_sampler_states(ctx, count, states);
- break;
- default:
- ;
- }
-}
+ struct si_context *sctx = (struct si_context *)ctx;
+ if (!count || shader >= SI_NUM_SHADERS)
+ return;
+ si_set_border_colors(sctx, count, states);
+ si_set_sampler_descriptors(sctx, shader, start, count, states);
+}
static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+ struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
+ struct si_pm4_state *pm4 = &state->pm4;
uint16_t mask = sample_mask;
- if (pm4 == NULL)
+ if (state == NULL)
return;
+ state->sample_mask = mask;
si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
- si_pm4_set_state(sctx, sample_mask, pm4);
+ si_pm4_set_state(sctx, sample_mask, state);
}
static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
+ v->format_size[i] = desc->block.bits / 8;
}
memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
struct si_vertex_element *v = (struct si_vertex_element*)state;
sctx->vertex_elements = v;
+ sctx->vertex_buffers_dirty = true;
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
FREE(state);
}
-static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,
+static void si_set_vertex_buffers(struct pipe_context *ctx,
+ unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
+ int i;
+
+ assert(start_slot + count <= Elements(sctx->vertex_buffer));
- util_set_vertex_buffers_count(sctx->vertex_buffer, &sctx->nr_vertex_buffers, buffers, start_slot, count);
+ if (buffers) {
+ for (i = 0; i < count; i++) {
+ const struct pipe_vertex_buffer *src = buffers + i;
+ struct pipe_vertex_buffer *dsti = dst + i;
+
+ pipe_resource_reference(&dsti->buffer, src->buffer);
+ dsti->buffer_offset = src->buffer_offset;
+ dsti->stride = src->stride;
+ }
+ } else {
+ for (i = 0; i < count; i++) {
+ pipe_resource_reference(&dst[i].buffer, NULL);
+ }
+ }
+ sctx->vertex_buffers_dirty = true;
}
static void si_set_index_buffer(struct pipe_context *ctx,
return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
}
-static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
-{
- /* XXX Turn this into a proper state. Right now the queries are
- * enabled in draw_vbo, which snoops r600_common_context to see
- * if any occlusion queries are active. */
-}
-
static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
void si_init_state_functions(struct si_context *sctx)
{
- int i;
-
- si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.framebuffer, si_emit_framebuffer_state, 0);
+ si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
+ si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
sctx->b.b.create_blend_state = si_create_blend_state;
sctx->b.b.bind_blend_state = si_bind_blend_state;
sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
- for (i = 0; i < 8; i++) {
- sctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(sctx, true, true, i);
- sctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(sctx, true, false, i);
- sctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(sctx, false, true, i);
- }
- sctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(sctx, false, false, 0);
+ sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
sctx->b.b.bind_fs_state = si_bind_ps_shader;
sctx->b.b.delete_vs_state = si_delete_vs_shader;
sctx->b.b.delete_fs_state = si_delete_ps_shader;
-#if HAVE_LLVM >= 0x0305
+
sctx->b.b.create_gs_state = si_create_gs_state;
sctx->b.b.bind_gs_state = si_bind_gs_shader;
sctx->b.b.delete_gs_state = si_delete_gs_shader;
-#endif
sctx->b.b.create_sampler_state = si_create_sampler_state;
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
sctx->b.b.create_sampler_view = si_create_sampler_view;
- sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
sctx->b.b.set_sample_mask = si_set_sample_mask;
sctx->b.b.texture_barrier = si_texture_barrier;
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
+ sctx->b.b.set_min_samples = si_set_min_samples;
+
sctx->b.dma_copy = si_dma_copy;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
si_cmd_context_control(pm4);
- si_pm4_set_reg(pm4, R_028A4C_PA_SC_MODE_CNTL_1, 0x0);
-
si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- if (sctx->b.chip_class == SI) {
- si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
- S_028AA8_SWITCH_ON_EOP(1) |
- S_028AA8_PARTIAL_VS_WAVE_ON(1) |
- S_028AA8_PRIMGROUP_SIZE(63));
- }
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->b.chip_class < CIK)
/* XXX todo */
case CHIP_KABINI:
/* XXX todo */
+ case CHIP_MULLINS:
+ /* XXX todo */
default:
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
- si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+
+ /* There is a hang if stencil is used and fast stencil is enabled
+ * regardless of whether HTILE is depth-only or not.
+ */
si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+ S_02800C_FAST_STENCIL_DISABLE(1));
+
si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
+ si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
if (sctx->b.chip_class >= CIK) {
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));