X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_state.c;h=c7633dcfa39b47747af5238a9a396a8b2f539c1d;hb=98a23982227dce29b015dcb5a867d05f2bee4388;hp=604f8d3456e33c5e793c80dfeac549b91f7d4e61;hpb=bea8f2f46dbc07b75762f5b88464580c49177b25;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 604f8d3456e..c7633dcfa39 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -27,15 +27,12 @@ #include "si_pipe.h" #include "si_shader.h" #include "sid.h" -#include "../radeon/r600_cs.h" +#include "radeon/r600_cs.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_scan.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" -#include "util/u_framebuffer.h" -#include "util/u_helpers.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, void (*emit)(struct si_context *ctx, struct r600_atom *state), @@ -47,15 +44,14 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, *list_elem = atom; } -uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split, - unsigned tile_mode_index) +uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) { - if ((sscreen->b.chip_class == CIK) && + if (sscreen->b.chip_class == CIK && sscreen->b.info.cik_macrotile_mode_array_valid) { unsigned index, tileb; - tileb = 8 * 8 * bpe; - tileb = MIN2(tile_split, tileb); + tileb = 8 * 8 * tex->surface.bpe; + tileb = MIN2(tex->surface.tile_split, tileb); for (index = 0; tileb > 64; index++) { tileb >>= 1; @@ -65,11 +61,14 @@ uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_spl return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; } - if ((sscreen->b.chip_class == SI) && + if (sscreen->b.chip_class == SI && sscreen->b.info.si_tile_mode_array_valid) { + /* Don't use stencil_tiling_index, because num_banks is always + * read from the depth mode. */ + unsigned tile_mode_index = tex->surface.tiling_index[0]; assert(tile_mode_index < 32); - return (sscreen->b.info.si_tile_mode_array[tile_mode_index] >> 20) & 0x3; + return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); } /* The old way. */ @@ -212,25 +211,33 @@ static unsigned si_pack_float_12p4(float x) } /* - * inferred framebuffer and blender state + * Inferred framebuffer and blender state. + * + * One of the reasons this must be derived from the framebuffer state is that: + * - The blend state mask is 0xf most of the time. + * - The COLOR1 format isn't INVALID because of possible dual-source blending, + * so COLOR1 is enabled pretty much all the time. + * So CB_TARGET_MASK is the only register that can disable COLOR1. */ static void si_update_fb_blend_state(struct si_context *sctx) { struct si_pm4_state *pm4; struct si_state_blend *blend = sctx->queued.named.blend; - uint32_t mask; + uint32_t mask = 0, i; if (blend == NULL) return; - pm4 = si_pm4_alloc_state(sctx); + pm4 = CALLOC_STRUCT(si_pm4_state); if (pm4 == NULL) return; - mask = (1ULL << ((unsigned)sctx->framebuffer.state.nr_cbufs * 4)) - 1; + for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) + if (sctx->framebuffer.state.cbufs[i]) + mask |= 0xf << (4*i); mask &= blend->cb_target_mask; - si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); + si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); si_pm4_set_state(sctx, fb_blend, pm4); } @@ -404,7 +411,7 @@ static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); if (pm4 == NULL) return; @@ -425,7 +432,7 @@ static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); struct pipe_constant_buffer cb; if (pm4 == NULL) @@ -452,24 +459,55 @@ static void si_set_clip_state(struct pipe_context *ctx, si_pm4_set_state(sctx, clip, pm4); } +#define SIX_BITS 0x3F + +static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct tgsi_shader_info *info = si_get_vs_info(sctx); + unsigned window_space = + info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; + unsigned clipdist_mask = + info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; + + r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | + S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | + S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | + S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || + info->writes_edgeflag || + info->writes_layer) | + (sctx->queued.named.rasterizer->clip_plane_enable & + clipdist_mask)); + r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, + sctx->queued.named.rasterizer->pa_cl_clip_cntl | + (clipdist_mask ? 0 : + sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | + S_028810_CLIP_DISABLE(window_space)); +} + static void si_set_scissor_states(struct pipe_context *ctx, unsigned start_slot, unsigned num_scissors, const struct pipe_scissor_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); + struct si_pm4_state *pm4 = &scissor->pm4; - if (pm4 == NULL) + if (scissor == NULL) return; + scissor->scissor = *state; si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); - si_pm4_set_state(sctx, scissor, pm4); + si_pm4_set_state(sctx, scissor, scissor); } static void si_set_viewport_states(struct pipe_context *ctx, @@ -526,7 +564,7 @@ static void si_update_fb_rs_state(struct si_context *sctx) return; } - pm4 = si_pm4_alloc_state(sctx); + pm4 = CALLOC_STRUCT(si_pm4_state); if (pm4 == NULL) return; @@ -578,6 +616,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->multisample_enable = state->multisample; rs->clip_plane_enable = state->clip_plane_enable; rs->line_stipple_enable = state->line_stipple_enable; + rs->poly_stipple_enable = state->poly_stipple_enable; + rs->line_smooth = state->line_smooth; + rs->poly_smooth = state->poly_smooth; polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -595,14 +636,15 @@ static void *si_create_rs_state(struct pipe_context *ctx, S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | + S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); rs->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) | + S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | @@ -646,7 +688,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | - S_028A48_MSAA_ENABLE(state->multisample) | + S_028A48_MSAA_ENABLE(state->multisample || + state->poly_smooth || + state->line_smooth) | S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, @@ -661,18 +705,26 @@ static void *si_create_rs_state(struct pipe_context *ctx, static void si_bind_rs_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_state_rasterizer *old_rs = + (struct si_state_rasterizer*)sctx->queued.named.rasterizer; struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; if (state == NULL) return; // TODO - sctx->sprite_coord_enable = rs->sprite_coord_enable; sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; + if (sctx->framebuffer.nr_samples > 1 && + (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) + sctx->db_render_state.dirty = true; + si_pm4_bind_state(sctx, rasterizer, rs); si_update_fb_rs_state(sctx); + + sctx->clip_regs.dirty = true; + sctx->last_rast_prim = -1; /* reset this so that it gets updated */ } static void si_delete_rs_state(struct pipe_context *ctx, void *state) @@ -686,12 +738,16 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state) */ static void si_update_dsa_stencil_ref(struct si_context *sctx) { - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_pm4_state *pm4; struct pipe_stencil_ref *ref = &sctx->stencil_ref; - struct si_state_dsa *dsa = sctx->queued.named.dsa; + struct si_state_dsa *dsa = sctx->queued.named.dsa; - if (pm4 == NULL) - return; + if (!dsa) + return; + + pm4 = CALLOC_STRUCT(si_pm4_state); + if (pm4 == NULL) + return; si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, S_028430_STENCILTESTVAL(ref->ref_value[0]) | @@ -753,7 +809,6 @@ static void *si_create_dsa_state(struct pipe_context *ctx, struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); struct si_pm4_state *pm4 = &dsa->pm4; unsigned db_depth_control; - unsigned db_render_control; uint32_t db_stencil_control = 0; if (dsa == NULL) { @@ -789,18 +844,15 @@ static void *si_create_dsa_state(struct pipe_context *ctx, /* alpha */ if (state->alpha.enabled) { dsa->alpha_func = state->alpha.func; - dsa->alpha_ref = state->alpha.ref_value; si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + - SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref)); + SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); } else { dsa->alpha_func = PIPE_FUNC_ALWAYS; } /* misc */ - db_render_control = 0; si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); - si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control); si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); return dsa; @@ -824,28 +876,94 @@ static void si_delete_dsa_state(struct pipe_context *ctx, void *state) si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); } -static void *si_create_db_flush_dsa(struct si_context *sctx, bool copy_depth, - bool copy_stencil, int sample) +static void *si_create_db_flush_dsa(struct si_context *sctx) { - struct pipe_depth_stencil_alpha_state dsa; - struct si_state_dsa *state; + struct pipe_depth_stencil_alpha_state dsa = {}; - memset(&dsa, 0, sizeof(dsa)); + return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); +} + +/* DB RENDER STATE */ + +static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) +{ + struct si_context *sctx = (struct si_context*)ctx; - state = sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); - if (copy_depth || copy_stencil) { - si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, - S_028000_DEPTH_COPY(copy_depth) | - S_028000_STENCIL_COPY(copy_stencil) | - S_028000_COPY_CENTROID(1) | - S_028000_COPY_SAMPLE(sample)); + sctx->db_render_state.dirty = true; +} + +static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + unsigned db_shader_control; + + r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); + + /* DB_RENDER_CONTROL */ + if (sctx->dbcb_depth_copy_enabled || + sctx->dbcb_stencil_copy_enabled) { + radeon_emit(cs, + S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | + S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | + S_028000_COPY_CENTROID(1) | + S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); + } else if (sctx->db_inplace_flush_enabled) { + radeon_emit(cs, + S_028000_DEPTH_COMPRESS_DISABLE(1) | + S_028000_STENCIL_COMPRESS_DISABLE(1)); + } else if (sctx->db_depth_clear) { + radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); } else { - si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, - S_028000_DEPTH_COMPRESS_DISABLE(1) | - S_028000_STENCIL_COMPRESS_DISABLE(1)); + radeon_emit(cs, 0); } - return state; + /* DB_COUNT_CONTROL (occlusion queries) */ + if (sctx->b.num_occlusion_queries > 0) { + if (sctx->b.chip_class >= CIK) { + radeon_emit(cs, + S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | + S_028004_ZPASS_ENABLE(1) | + S_028004_SLICE_EVEN_ENABLE(1) | + S_028004_SLICE_ODD_ENABLE(1)); + } else { + radeon_emit(cs, + S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); + } + } else { + /* Disable occlusion queries. */ + if (sctx->b.chip_class >= CIK) { + radeon_emit(cs, 0); + } else { + radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); + } + } + + /* DB_RENDER_OVERRIDE2 */ + if (sctx->db_depth_disable_expclear) { + r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, + S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1)); + } else { + r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); + } + + db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | + sctx->ps_db_shader_control; + + /* Bug workaround for smoothing (overrasterization) on SI. */ + if (sctx->b.chip_class == SI && sctx->smoothing_enabled) + db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); + else + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + + /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ + if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) + db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; + + r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, + db_shader_control); } /* @@ -1102,6 +1220,22 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, } } + if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { + if (!enable_s3tc) + goto out_unknown; + + switch (format) { + case PIPE_FORMAT_BPTC_RGBA_UNORM: + case PIPE_FORMAT_BPTC_SRGBA: + return V_008F14_IMG_DATA_FORMAT_BC7; + case PIPE_FORMAT_BPTC_RGB_FLOAT: + case PIPE_FORMAT_BPTC_RGB_UFLOAT: + return V_008F14_IMG_DATA_FORMAT_BC6; + default: + goto out_unknown; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { switch (format) { case PIPE_FORMAT_R8G8_B8G8_UNORM: @@ -1512,13 +1646,17 @@ boolean si_is_format_supported(struct pipe_screen *screen, if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && + PIPE_BIND_SHARED | + PIPE_BIND_BLENDABLE)) && si_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED); + if (!util_format_is_pure_integer(format) && + !util_format_is_depth_or_stencil(format)) + retval |= usage & PIPE_BIND_BLENDABLE; } if ((usage & PIPE_BIND_DEPTH_STENCIL) && @@ -1680,7 +1818,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } } - offset += r600_resource_va(sctx->b.b.screen, surf->base.texture); + offset += rtex->resource.gpu_address; surf->cb_color_base = offset >> 8; surf->cb_color_pitch = color_pitch; @@ -1726,7 +1864,8 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_screen *sscreen = sctx->screen; struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; unsigned level = surf->base.u.tex.level; - unsigned pitch, slice, format, tile_mode_index, array_mode; + struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; + unsigned format, tile_mode_index, array_mode; unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; uint32_t z_info, s_info, db_depth_info; uint64_t z_offs, s_offs; @@ -1758,16 +1897,10 @@ static void si_init_depth_surface(struct si_context *sctx, } assert(format != V_028040_Z_INVALID); - s_offs = z_offs = r600_resource_va(sctx->b.b.screen, surf->base.texture); + s_offs = z_offs = rtex->resource.gpu_address; z_offs += rtex->surface.level[level].offset; s_offs += rtex->surface.stencil_level[level].offset; - pitch = (rtex->surface.level[level].nblk_x / 8) - 1; - slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; - if (slice) { - slice = slice - 1; - } - db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); z_info = S_028040_FORMAT(format); @@ -1802,8 +1935,7 @@ static void si_init_depth_surface(struct si_context *sctx, macro_aspect = cik_macro_tile_aspect(macro_aspect); bankw = cik_bank_wh(bankw); bankh = cik_bank_wh(bankh); - nbanks = si_num_banks(sscreen, rtex->surface.bpe, rtex->surface.tile_split, - ~0); + nbanks = si_num_banks(sscreen, rtex); tile_mode_index = si_tile_mode_index(rtex, level, false); pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); @@ -1825,9 +1957,8 @@ static void si_init_depth_surface(struct si_context *sctx, /* HiZ aka depth buffer htile */ /* use htile only for first level */ if (rtex->htile_buffer && !level) { - const struct util_format_description *fmt_desc; - - z_info |= S_028040_TILE_SURFACE_ENABLE(1); + z_info |= S_028040_TILE_SURFACE_ENABLE(1) | + S_028040_ALLOW_EXPCLEAR(1); /* This is optimal for the clear value of 1.0 and using * the LESS and LEQUAL test functions. Set this to 0 @@ -1835,13 +1966,11 @@ static void si_init_depth_surface(struct si_context *sctx, * clearing. */ z_info |= S_028040_ZRANGE_PRECISION(1); - fmt_desc = util_format_description(rtex->resource.b.b.format); - if (!util_format_has_stencil(fmt_desc)) { - /* Use all of the htile_buffer for depth */ - s_info |= S_028044_TILE_STENCIL_DISABLE(1); - } + /* Use all of the htile_buffer for depth, because we don't + * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ + s_info |= S_028044_TILE_STENCIL_DISABLE(1); - uint64_t va = r600_resource_va(&sctx->screen->b.b, &rtex->htile_buffer->b.b); + uint64_t va = rtex->htile_buffer->gpu_address; db_htile_data_base = va >> 8; db_htile_surface = S_028ABC_FULL_CACHE(1); } else { @@ -1849,6 +1978,8 @@ static void si_init_depth_surface(struct si_context *sctx, db_htile_surface = 0; } + assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); + surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | S_028008_SLICE_MAX(surf->base.u.tex.last_layer); surf->db_htile_data_base = db_htile_data_base; @@ -1857,8 +1988,10 @@ static void si_init_depth_surface(struct si_context *sctx, surf->db_stencil_info = s_info; surf->db_depth_base = z_offs >> 8; surf->db_stencil_base = s_offs >> 8; - surf->db_depth_size = S_028058_PITCH_TILE_MAX(pitch); - surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice); + surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | + S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); + surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * + levelinfo->nblk_y) / 64 - 1); surf->db_htile_surface = db_htile_surface; surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; @@ -1872,16 +2005,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, struct pipe_constant_buffer constbuf = {0}; struct r600_surface *surf = NULL; struct r600_texture *rtex; + bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; + unsigned old_nr_samples = sctx->framebuffer.nr_samples; int i; - if (sctx->framebuffer.state.nr_cbufs) { - sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | - R600_CONTEXT_FLUSH_AND_INV_CB_META; - } - if (sctx->framebuffer.state.zsbuf) { - sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB | - R600_CONTEXT_FLUSH_AND_INV_DB_META; - } + /* Only flush TC when changing the framebuffer state, because + * the only client not using TC that can change textures is + * the framebuffer. + * + * Flush all CB and DB caches here because all buffers can be used + * for write by both TC (with shader image stores) and CB/DB. + */ + sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; util_copy_framebuffer_state(&sctx->framebuffer.state, state); @@ -1892,6 +2029,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && util_format_is_pure_integer(state->cbufs[0]->format); + if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) + sctx->db_render_state.dirty = true; + for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) continue; @@ -1930,35 +2070,52 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_update_fb_blend_state(sctx); sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; - sctx->framebuffer.atom.num_dw += state->zsbuf ? 23 : 4; + sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ sctx->framebuffer.atom.dirty = true; - sctx->msaa_config.dirty = true; - /* Set sample locations as fragment shader constants. */ - switch (sctx->framebuffer.nr_samples) { - case 1: - constbuf.user_buffer = sctx->b.sample_locations_1x; - break; - case 2: - constbuf.user_buffer = sctx->b.sample_locations_2x; - break; - case 4: - constbuf.user_buffer = sctx->b.sample_locations_4x; - break; - case 8: - constbuf.user_buffer = sctx->b.sample_locations_8x; - break; - case 16: - constbuf.user_buffer = sctx->b.sample_locations_16x; - break; - default: - assert(0); + if (sctx->framebuffer.nr_samples != old_nr_samples) { + sctx->msaa_config.dirty = true; + sctx->db_render_state.dirty = true; + + /* Set sample locations as fragment shader constants. */ + switch (sctx->framebuffer.nr_samples) { + case 1: + constbuf.user_buffer = sctx->b.sample_locations_1x; + break; + case 2: + constbuf.user_buffer = sctx->b.sample_locations_2x; + break; + case 4: + constbuf.user_buffer = sctx->b.sample_locations_4x; + break; + case 8: + constbuf.user_buffer = sctx->b.sample_locations_8x; + break; + case 16: + constbuf.user_buffer = sctx->b.sample_locations_16x; + break; + default: + assert(0); + } + constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; + ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, + SI_DRIVER_STATE_CONST_BUF, &constbuf); + + /* Smoothing (only possible with nr_samples == 1) uses the same + * sample locations as the MSAA it simulates. + * + * Therefore, don't update the sample locations when + * transitioning from no AA to smoothing-equivalent AA, and + * vice versa. + */ + if ((sctx->framebuffer.nr_samples != 1 || + old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && + (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || + old_nr_samples != 1)) + sctx->msaa_sample_locs.dirty = true; } - constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; - ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, - SI_DRIVER_STATE_CONST_BUF, &constbuf); } static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) @@ -2048,6 +2205,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); + r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, zb->pa_su_poly_offset_db_fmt_cntl); } else { @@ -2060,17 +2218,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); +} - cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); +static void si_emit_msaa_sample_locs(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + struct si_context *sctx = (struct si_context *)rctx; + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + unsigned nr_samples = sctx->framebuffer.nr_samples; + + cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : + SI_NUM_SMOOTH_AA_SAMPLES); } +const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */ + static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) { struct si_context *sctx = (struct si_context *)rctx; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, - sctx->ps_iter_samples); + sctx->ps_iter_samples, + sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0); } const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ @@ -2088,268 +2258,6 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) sctx->msaa_config.dirty = true; } -/* - * shaders - */ - -/* Compute the key for the hw shader variant */ -static INLINE void si_shader_selector_key(struct pipe_context *ctx, - struct si_pipe_shader_selector *sel, - union si_shader_key *key) -{ - struct si_context *sctx = (struct si_context *)ctx; - memset(key, 0, sizeof(*key)); - - if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) && - sctx->queued.named.rasterizer) { - if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0) - key->vs.ucps_enabled |= 0x2; - if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf) - key->vs.ucps_enabled |= 0x1; - } - - if (sel->type == PIPE_SHADER_VERTEX) { - unsigned i; - if (!sctx->vertex_elements) - return; - - for (i = 0; i < sctx->vertex_elements->count; ++i) - key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - - key->vs.as_es = sctx->gs_shader != NULL; - } else if (sel->type == PIPE_SHADER_FRAGMENT) { - if (sel->fs_write_all) - key->ps.nr_cbufs = sctx->framebuffer.state.nr_cbufs; - key->ps.export_16bpc = sctx->framebuffer.export_16bpc; - - if (sctx->queued.named.rasterizer) { - key->ps.color_two_side = sctx->queued.named.rasterizer->two_side; - key->ps.flatshade = sctx->queued.named.rasterizer->flatshade; - key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 && - sctx->ps_iter_samples == sctx->framebuffer.nr_samples; - - if (sctx->queued.named.blend) { - key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && - sctx->queued.named.rasterizer->multisample_enable && - !sctx->framebuffer.cb0_is_integer; - } - } - if (sctx->queued.named.dsa) { - key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; - - /* Alpha-test should be disabled if colorbuffer 0 is integer. */ - if (sctx->framebuffer.cb0_is_integer) - key->ps.alpha_func = PIPE_FUNC_ALWAYS; - } else { - key->ps.alpha_func = PIPE_FUNC_ALWAYS; - } - } -} - -/* Select the hw shader variant depending on the current state. */ -int si_shader_select(struct pipe_context *ctx, - struct si_pipe_shader_selector *sel) -{ - union si_shader_key key; - struct si_pipe_shader * shader = NULL; - int r; - - si_shader_selector_key(ctx, sel, &key); - - /* Check if we don't need to change anything. - * This path is also used for most shaders that don't need multiple - * variants, it will cost just a computation of the key and this - * test. */ - if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { - return 0; - } - - /* lookup if we have other variants in the list */ - if (sel->num_shaders > 1) { - struct si_pipe_shader *p = sel->current, *c = p->next_variant; - - while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { - p = c; - c = c->next_variant; - } - - if (c) { - p->next_variant = c->next_variant; - shader = c; - } - } - - if (shader) { - shader->next_variant = sel->current; - sel->current = shader; - } else { - shader = CALLOC(1, sizeof(struct si_pipe_shader)); - shader->selector = sel; - shader->key = key; - - shader->next_variant = sel->current; - sel->current = shader; - r = si_pipe_shader_create(ctx, shader); - if (unlikely(r)) { - R600_ERR("Failed to build shader variant (type=%u) %d\n", - sel->type, r); - sel->current = NULL; - FREE(shader); - return r; - } - sel->num_shaders++; - } - - return 0; -} - -static void *si_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state, - unsigned pipe_shader_type) -{ - struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector); - int r; - - sel->type = pipe_shader_type; - sel->tokens = tgsi_dup_tokens(state->tokens); - sel->so = state->stream_output; - - if (pipe_shader_type == PIPE_SHADER_FRAGMENT) { - struct tgsi_shader_info info; - - tgsi_scan_shader(state->tokens, &info); - sel->fs_write_all = info.color0_writes_all_cbufs; - } - - r = si_shader_select(ctx, sel); - if (r) { - free(sel); - return NULL; - } - - return sel; -} - -static void *si_create_fs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); -} - -static void *si_create_gs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); -} - -static void *si_create_vs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); -} - -static void si_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = state; - - if (sctx->vs_shader == sel) - return; - - if (!sel || !sel->current) - return; - - sctx->vs_shader = sel; -} - -static void si_bind_gs_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = state; - - if (sctx->gs_shader == sel) - return; - - sctx->gs_shader = sel; -} - -static void si_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = state; - - /* skip if supplied shader is one already in use */ - if (sctx->ps_shader == sel) - return; - - /* use dummy shader if supplied shader is corrupt */ - if (!sel || !sel->current) - sel = sctx->dummy_pixel_shader; - - sctx->ps_shader = sel; -} - -static void si_delete_shader_selector(struct pipe_context *ctx, - struct si_pipe_shader_selector *sel) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader *p = sel->current, *c; - - while (p) { - c = p->next_variant; - if (sel->type == PIPE_SHADER_GEOMETRY) - si_pm4_delete_state(sctx, gs, p->pm4); - else if (sel->type == PIPE_SHADER_FRAGMENT) - si_pm4_delete_state(sctx, ps, p->pm4); - else if (p->key.vs.as_es) - si_pm4_delete_state(sctx, es, p->pm4); - else - si_pm4_delete_state(sctx, vs, p->pm4); - si_pipe_shader_destroy(ctx, p); - free(p); - p = c; - } - - free(sel->tokens); - free(sel); - } - -static void si_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; - - if (sctx->vs_shader == sel) { - sctx->vs_shader = NULL; - } - - si_delete_shader_selector(ctx, sel); -} - -static void si_delete_gs_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; - - if (sctx->gs_shader == sel) { - sctx->gs_shader = NULL; - } - - si_delete_shader_selector(ctx, sel); -} - -static void si_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; - - if (sctx->ps_shader == sel) { - sctx->ps_shader = NULL; - } - - si_delete_shader_selector(ctx, sel); -} - /* * Samplers */ @@ -2358,7 +2266,8 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view); + struct si_context *sctx = (struct si_context*)ctx; + struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); struct r600_texture *tmp = (struct r600_texture*)texture; const struct util_format_description *desc; unsigned format, num_format; @@ -2376,9 +2285,20 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx /* initialize base object */ view->base = *state; view->base.texture = NULL; - pipe_resource_reference(&view->base.texture, texture); view->base.reference.count = 1; view->base.context = ctx; + + /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ + if (!texture) { + view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | + S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | + S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | + S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | + S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); + return &view->base; + } + + pipe_resource_reference(&view->base.texture, texture); view->resource = &tmp->resource; /* Buffer resource. */ @@ -2388,20 +2308,22 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx desc = util_format_description(state->format); first_non_void = util_format_get_first_non_void_channel(state->format); stride = desc->block.bits / 8; - va = r600_resource_va(ctx->screen, texture) + state->u.buf.first_element*stride; + va = tmp->resource.gpu_address + state->u.buf.first_element*stride; format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); - view->state[0] = va; - view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + view->state[4] = va; + view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element; - view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | + view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element; + view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(format); + + LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); return &view->base; } @@ -2467,12 +2389,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx case PIPE_FORMAT_DXT1_SRGBA: case PIPE_FORMAT_DXT3_SRGBA: case PIPE_FORMAT_DXT5_SRGBA: + case PIPE_FORMAT_BPTC_SRGBA: num_format = V_008F14_IMG_NUM_FORMAT_SRGB; break; case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_LATC1_SNORM: case PIPE_FORMAT_RGTC2_SNORM: case PIPE_FORMAT_LATC2_SNORM: + /* implies float, so use SNORM/UNORM to determine + whether data is signed or not */ + case PIPE_FORMAT_BPTC_RGB_FLOAT: num_format = V_008F14_IMG_NUM_FORMAT_SNORM; break; default: @@ -2533,8 +2459,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) depth = texture->array_size / 6; - va = r600_resource_va(ctx->screen, texture); - va += surflevel[0].offset; + va = tmp->resource.gpu_address + surflevel[0].offset; va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; view->state[0] = va >> 8; @@ -2563,7 +2488,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx /* Initialize the sampler view for FMASK. */ if (tmp->fmask.size) { - uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset; + uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; uint32_t fmask_format; switch (texture->nr_samples) { @@ -2607,10 +2532,13 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx static void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) { - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; + struct si_sampler_view *view = (struct si_sampler_view *)state; + + if (view->resource && view->resource->b.b.target == PIPE_BUFFER) + LIST_DELINIT(&view->list); pipe_resource_reference(&state->texture, NULL); - FREE(resource); + FREE(view); } static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) @@ -2637,7 +2565,7 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st static void *si_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state); + struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; unsigned border_color_type; @@ -2681,7 +2609,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, static void si_set_border_colors(struct si_context *sctx, unsigned count, void **states) { - struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states; + struct si_sampler_state **rstates = (struct si_sampler_state **)states; uint32_t *border_color_table = NULL; int i, j; @@ -2697,7 +2625,7 @@ static void si_set_border_colors(struct si_context *sctx, unsigned count, sctx->border_color_table = si_resource_create_custom(&sctx->screen->b.b, - PIPE_USAGE_STAGING, + PIPE_USAGE_DYNAMIC, 4096 * 4 * 4); } @@ -2720,11 +2648,9 @@ static void si_set_border_colors(struct si_context *sctx, unsigned count, } if (border_color_table) { - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - uint64_t va_offset = - r600_resource_va(&sctx->screen->b.b, - (void*)sctx->border_color_table); + uint64_t va_offset = sctx->border_color_table->gpu_address; si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); if (sctx->b.chip_class >= CIK) @@ -2751,16 +2677,18 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); + struct si_pm4_state *pm4 = &state->pm4; uint16_t mask = sample_mask; - if (pm4 == NULL) + if (state == NULL) return; + state->sample_mask = mask; si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); - si_pm4_set_state(sctx, sample_mask, pm4); + si_pm4_set_state(sctx, sample_mask, state); } static void si_delete_sampler_state(struct pipe_context *ctx, void *state) @@ -2800,6 +2728,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); + v->format_size[i] = desc->block.bits / 8; } memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); @@ -2812,6 +2741,7 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) struct si_vertex_element *v = (struct si_vertex_element*)state; sctx->vertex_elements = v; + sctx->vertex_buffers_dirty = true; } static void si_delete_vertex_element(struct pipe_context *ctx, void *state) @@ -2823,12 +2753,31 @@ static void si_delete_vertex_element(struct pipe_context *ctx, void *state) FREE(state); } -static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, +static void si_set_vertex_buffers(struct pipe_context *ctx, + unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *buffers) { struct si_context *sctx = (struct si_context *)ctx; + struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; + int i; + + assert(start_slot + count <= Elements(sctx->vertex_buffer)); - util_set_vertex_buffers_count(sctx->vertex_buffer, &sctx->nr_vertex_buffers, buffers, start_slot, count); + if (buffers) { + for (i = 0; i < count; i++) { + const struct pipe_vertex_buffer *src = buffers + i; + struct pipe_vertex_buffer *dsti = dst + i; + + pipe_resource_reference(&dsti->buffer, src->buffer); + dsti->buffer_offset = src->buffer_offset; + dsti->stride = src->stride; + } + } else { + for (i = 0; i < count; i++) { + pipe_resource_reference(&dst[i].buffer, NULL); + } + } + sctx->vertex_buffers_dirty = true; } static void si_set_index_buffer(struct pipe_context *ctx, @@ -2850,14 +2799,65 @@ static void si_set_index_buffer(struct pipe_context *ctx, static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { + struct si_context *sctx = (struct si_context *)ctx; + struct pipe_resource *tex; + struct pipe_sampler_view *view; + bool is_zero = true; + bool is_one = true; + int i; + + /* The hardware obeys 0 and 1 swizzles in the descriptor even if + * the resource is NULL/invalid. Take advantage of this fact and skip + * texture allocation if the stipple pattern is constant. + * + * This is an optimization for the common case when stippling isn't + * used but set_polygon_stipple is still called by st/mesa. + */ + for (i = 0; i < Elements(state->stipple); i++) { + is_zero = is_zero && state->stipple[i] == 0; + is_one = is_one && state->stipple[i] == 0xffffffff; + } + + if (is_zero || is_one) { + struct pipe_sampler_view templ = {{0}}; + + templ.swizzle_r = PIPE_SWIZZLE_ZERO; + templ.swizzle_g = PIPE_SWIZZLE_ZERO; + templ.swizzle_b = PIPE_SWIZZLE_ZERO; + /* The pattern should be inverted in the texture. */ + templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO; + + view = ctx->create_sampler_view(ctx, NULL, &templ); + } else { + /* Create a new texture. */ + tex = util_pstipple_create_stipple_texture(ctx, state->stipple); + if (!tex) + return; + + view = util_pstipple_create_sampler_view(ctx, tex); + pipe_resource_reference(&tex, NULL); + } + + ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, + SI_POLY_STIPPLE_SAMPLER, 1, &view); + pipe_sampler_view_reference(&view, NULL); + + /* Bind the sampler state if needed. */ + if (!sctx->pstipple_sampler_state) { + sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx); + ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT, + SI_POLY_STIPPLE_SAMPLER, 1, + &sctx->pstipple_sampler_state); + } } static void si_texture_barrier(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; - sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | - R600_CONTEXT_FLUSH_AND_INV_CB; + sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; } static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) @@ -2870,13 +2870,6 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) return si_create_blend_state_mode(&sctx->b.b, &blend, mode); } -static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) -{ - /* XXX Turn this into a proper state. Right now the queries are - * enabled in draw_vbo, which snoops r600_common_context to see - * if any occlusion queries are active. */ -} - static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, bool include_draw_vbo) { @@ -2885,9 +2878,9 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, void si_init_state_functions(struct si_context *sctx) { - int i; - si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); + si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); + si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); sctx->b.b.create_blend_state = si_create_blend_state; sctx->b.b.bind_blend_state = si_bind_blend_state; @@ -2902,12 +2895,7 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; - for (i = 0; i < 8; i++) { - sctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(sctx, true, true, i); - sctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(sctx, true, false, i); - sctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(sctx, false, true, i); - } - sctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(sctx, false, false, 0); + sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); @@ -2920,17 +2908,6 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; sctx->b.b.get_sample_position = cayman_get_sample_position; - sctx->b.b.create_vs_state = si_create_vs_state; - sctx->b.b.create_fs_state = si_create_fs_state; - sctx->b.b.bind_vs_state = si_bind_vs_shader; - sctx->b.b.bind_fs_state = si_bind_ps_shader; - sctx->b.b.delete_vs_state = si_delete_vs_shader; - sctx->b.b.delete_fs_state = si_delete_ps_shader; - - sctx->b.b.create_gs_state = si_create_gs_state; - sctx->b.b.bind_gs_state = si_bind_gs_shader; - sctx->b.b.delete_gs_state = si_delete_gs_shader; - sctx->b.b.create_sampler_state = si_create_sampler_state; sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.delete_sampler_state = si_delete_sampler_state; @@ -2957,9 +2934,113 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.draw_vbo = si_draw_vbo; } +static void +si_write_harvested_raster_configs(struct si_context *sctx, + struct si_pm4_state *pm4, + unsigned raster_config) +{ + unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); + unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); + unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; + unsigned num_rb = sctx->screen->b.info.r600_num_backends; + unsigned rb_per_pkr = num_rb / num_se / sh_per_se; + unsigned rb_per_se = num_rb / num_se; + unsigned se0_mask = (1 << rb_per_se) - 1; + unsigned se1_mask = se0_mask << rb_per_se; + unsigned se; + + assert(num_se == 1 || num_se == 2); + assert(sh_per_se == 1 || sh_per_se == 2); + assert(rb_per_pkr == 1 || rb_per_pkr == 2); + + /* XXX: I can't figure out what the *_XSEL and *_YSEL + * fields are for, so I'm leaving them as their default + * values. */ + + se0_mask &= rb_mask; + se1_mask &= rb_mask; + if (num_se == 2 && (!se0_mask || !se1_mask)) { + raster_config &= C_028350_SE_MAP; + + if (!se0_mask) { + raster_config |= + S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); + } else { + raster_config |= + S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); + } + } + + for (se = 0; se < num_se; se++) { + unsigned raster_config_se = raster_config; + unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); + unsigned pkr1_mask = pkr0_mask << rb_per_pkr; + + pkr0_mask &= rb_mask; + pkr1_mask &= rb_mask; + if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) { + raster_config_se &= C_028350_PKR_MAP; + + if (!pkr0_mask) { + raster_config_se |= + S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); + } else { + raster_config_se |= + S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); + } + } + + if (rb_per_pkr == 2) { + unsigned rb0_mask = 1 << (se * rb_per_se); + unsigned rb1_mask = rb0_mask << 1; + + rb0_mask &= rb_mask; + rb1_mask &= rb_mask; + if (!rb0_mask || !rb1_mask) { + raster_config_se &= C_028350_RB_MAP_PKR0; + + if (!rb0_mask) { + raster_config_se |= + S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); + } else { + raster_config_se |= + S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); + } + } + + if (sh_per_se == 2) { + rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); + rb1_mask = rb0_mask << 1; + rb0_mask &= rb_mask; + rb1_mask &= rb_mask; + if (!rb0_mask || !rb1_mask) { + raster_config_se &= C_028350_RB_MAP_PKR1; + + if (!rb0_mask) { + raster_config_se |= + S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); + } else { + raster_config_se |= + S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); + } + } + } + } + + si_pm4_set_reg(pm4, GRBM_GFX_INDEX, + SE_INDEX(se) | SH_BROADCAST_WRITES | + INSTANCE_BROADCAST_WRITES); + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); + } + + si_pm4_set_reg(pm4, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | + INSTANCE_BROADCAST_WRITES); +} + void si_init_config(struct si_context *sctx) { - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); if (pm4 == NULL) return; @@ -2995,13 +3076,7 @@ void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); - if (sctx->b.chip_class == SI) { - si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM, - S_028AA8_SWITCH_ON_EOP(1) | - S_028AA8_PARTIAL_VS_WAVE_ON(1) | - S_028AA8_PRIMGROUP_SIZE(63)); - } - si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); + si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0); si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | @@ -3016,7 +3091,7 @@ void si_init_config(struct si_context *sctx) switch (sctx->screen->b.family) { case CHIP_BONAIRE: si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); - si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); + si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0); break; case CHIP_HAWAII: si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); @@ -3029,29 +3104,45 @@ void si_init_config(struct si_context *sctx) case CHIP_MULLINS: /* XXX todo */ default: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); - si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0); + si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0); break; } } else { + unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; + unsigned num_rb = sctx->screen->b.info.r600_num_backends; + unsigned raster_config; + switch (sctx->screen->b.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a); + raster_config = 0x2a00126a; break; case CHIP_VERDE: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a); + raster_config = 0x0000124a; break; case CHIP_OLAND: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082); + raster_config = 0x00000082; break; case CHIP_HAINAN: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); + raster_config = 0; break; default: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); + fprintf(stderr, + "radeonsi: Unknown GPU, using 0 for raster_config\n"); + raster_config = 0; break; } + + /* Always use the default config when all backends are enabled + * (or when we failed to determine the enabled backends). + */ + if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, + raster_config); + } else { + si_write_harvested_raster_configs(sctx, pm4, raster_config); + } } si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); @@ -3064,24 +3155,30 @@ void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); - si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); - si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); - si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F); - si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); - si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); - si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); - si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); - si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); - si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); - si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); - si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); - si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000); + /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ + si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); + si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0); + si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0)); + si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); + si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); + si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); + si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); + si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); + si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0); + si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0); + si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0); si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); + + /* There is a hang if stencil is used and fast stencil is enabled + * regardless of whether HTILE is depth-only or not. + */ si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | + S_02800C_FAST_STENCIL_DISABLE(1)); + si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); @@ -3092,5 +3189,5 @@ void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } - si_pm4_set_state(sctx, init, pm4); + sctx->init_config = pm4; }