GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
GL_EXT_color_buffer_float DONE (all drivers)
- GL_KHR_blend_equation_advanced DONE (i965, nvc0)
+ GL_KHR_blend_equation_advanced DONE (i965, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_KHR_robustness DONE (i965, nvc0, radeonsi)
GL_KHR_texture_compression_astc_ldr DONE (freedreno, i965/gen9+)
<li>GL_EXT_semaphore_fd on radeonsi</li>
<li>GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already supported)</li>
<li>GL_EXT_shader_framebuffer_fetch_non_coherent on i965</li>
+<li>GL_KHR_blend_equation_advanced on radeonsi</li>
<li>Disk shader cache support for i965 enabled by default</li>
</ul>
si_decompress_resident_images(sctx);
}
+ if (sctx->ps_uses_fbfetch) {
+ struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
+ si_decompress_color_texture(sctx,
+ (struct r600_texture*)cb0->texture,
+ cb0->u.tex.first_layer,
+ cb0->u.tex.last_layer);
+ }
+
si_check_render_feedback(sctx);
}
static void si_set_shader_image_desc(struct si_context *ctx,
const struct pipe_image_view *view,
bool skip_decompress,
- uint32_t *desc)
+ uint32_t *desc, uint32_t *fmask_desc)
{
struct si_screen *screen = ctx->screen;
struct r600_resource *res;
* Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
* so we don't wanna trigger it.
*/
- if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
+ if (tex->is_depth ||
+ (!fmask_desc && tex->fmask.size != 0)) {
assert(!"Z/S and MSAA image stores are not supported");
access &= ~PIPE_IMAGE_ACCESS_WRITE;
}
assert(!tex->is_depth);
- assert(tex->fmask.size == 0);
+ assert(fmask_desc || tex->fmask.size == 0);
if (uses_dcc && !skip_decompress &&
(view->access & PIPE_IMAGE_ACCESS_WRITE ||
view->u.tex.first_layer,
view->u.tex.last_layer,
width, height, depth,
- desc, NULL);
+ desc, fmask_desc);
si_set_mutable_tex_desc_fields(screen, tex,
&tex->surface.u.legacy.level[level],
level, level,
if (&images->views[slot] != view)
util_copy_image_view(&images->views[slot], view);
- si_set_shader_image_desc(ctx, view, skip_decompress, desc);
+ si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL);
if (res->b.b.target == PIPE_BUFFER) {
images->needs_color_decompress_mask &= ~(1 << slot);
}
}
+void si_update_ps_colorbuf0_slot(struct si_context *sctx)
+{
+ struct si_buffer_resources *buffers = &sctx->rw_buffers;
+ struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ unsigned slot = SI_PS_IMAGE_COLORBUF0;
+ struct pipe_surface *surf = NULL;
+
+ /* si_texture_disable_dcc can get us here again. */
+ if (sctx->blitter->running)
+ return;
+
+ /* See whether FBFETCH is used and color buffer 0 is set. */
+ if (sctx->ps_shader.cso &&
+ sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] &&
+ sctx->framebuffer.state.nr_cbufs &&
+ sctx->framebuffer.state.cbufs[0])
+ surf = sctx->framebuffer.state.cbufs[0];
+
+ /* Return if FBFETCH transitions from disabled to disabled. */
+ if (!buffers->buffers[slot] && !surf)
+ return;
+
+ sctx->ps_uses_fbfetch = surf != NULL;
+ si_update_ps_iter_samples(sctx);
+
+ if (surf) {
+ struct r600_texture *tex = (struct r600_texture*)surf->texture;
+ struct pipe_image_view view;
+
+ assert(tex);
+ assert(!tex->is_depth);
+
+ /* Disable DCC, because the texture is used as both a sampler
+ * and color buffer.
+ */
+ si_texture_disable_dcc(&sctx->b, tex);
+
+ if (tex->resource.b.b.nr_samples <= 1 && tex->cmask_buffer) {
+ /* Disable CMASK. */
+ assert(tex->cmask_buffer != &tex->resource);
+ si_eliminate_fast_color_clear(&sctx->b, tex);
+ si_texture_discard_cmask(sctx->screen, tex);
+ }
+
+ view.resource = surf->texture;
+ view.format = surf->format;
+ view.access = PIPE_IMAGE_ACCESS_READ;
+ view.u.tex.first_layer = surf->u.tex.first_layer;
+ view.u.tex.last_layer = surf->u.tex.last_layer;
+ view.u.tex.level = surf->u.tex.level;
+
+ /* Set the descriptor. */
+ uint32_t *desc = descs->list + slot*4;
+ memset(desc, 0, 16 * 4);
+ si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);
+
+ pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ &tex->resource, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_RW_IMAGE);
+ buffers->enabled_mask |= 1u << slot;
+ } else {
+ /* Clear the descriptor. */
+ memset(descs->list + slot*4, 0, 8*4);
+ pipe_resource_reference(&buffers->buffers[slot], NULL);
+ buffers->enabled_mask &= ~(1u << slot);
+ }
+
+ sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
/* SAMPLER STATES */
static void si_bind_sampler_states(struct pipe_context *ctx,
memcpy(desc_list, desc->list + desc_slot_offset,
sizeof(desc_list));
si_set_shader_image_desc(sctx, view, true,
- desc->list + desc_slot_offset);
+ desc->list + desc_slot_offset, NULL);
if (memcmp(desc_list, desc->list + desc_slot_offset,
sizeof(desc_list))) {
}
si_update_all_resident_texture_descriptors(sctx);
+ si_update_ps_colorbuf0_slot(sctx);
}
/* SHADER USER DATA */
memset(desc_list, 0, sizeof(desc_list));
si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
- si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
+ si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL);
img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
sizeof(desc_list));
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_TGSI_FS_FBFETCH:
return 1;
case PIPE_CAP_TGSI_BALLOT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
- case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
/* MSAA config state. */
int ps_iter_samples;
+ bool ps_uses_fbfetch;
bool smoothing_enabled;
/* DB render state. */
return tex->tc_compatible_htile && level == 0;
}
+static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)
+{
+ if (sctx->ps_uses_fbfetch)
+ return sctx->framebuffer.nr_samples;
+
+ return sctx->ps_iter_samples;
+}
+
#endif
si_llvm_load_input_fs(ctx, input_index, out);
}
-static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
+LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4);
}
break;
case TGSI_SEMANTIC_SAMPLEID:
- value = get_sample_id(ctx);
+ value = si_get_sample_id(ctx);
break;
case TGSI_SEMANTIC_SAMPLEPOS: {
unsigned vs_export_prim_id:1;
struct {
unsigned interpolate_at_sample_force_center:1;
+ unsigned fbfetch_msaa;
+ unsigned fbfetch_is_1D;
+ unsigned fbfetch_layered;
} ps;
} u;
} mono;
LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
const struct tgsi_ind_register *ind,
int rel_index, unsigned num);
+LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
void si_shader_context_init_mem(struct si_shader_context *ctx);
emit_data->output[emit_data->chan] = samples;
}
+static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct ac_image_args args = {};
+ LLVMValueRef ptr, image, fmask, addr_vec;
+
+ /* Ignore src0, because KHR_blend_func_extended disallows multiple render
+ * targets.
+ */
+
+ /* Load the image descriptor. */
+ STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+ ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
+ ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
+ ac_array_in_const32_addr_space(ctx->v8i32), "");
+ image = ac_build_load_to_sgpr(&ctx->ac, ptr,
+ LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+ LLVMValueRef addr[4];
+ unsigned chan = 0;
+
+ addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16);
+
+ if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+ addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16);
+
+ /* Get the current render target layer index. */
+ if (ctx->shader->key.mono.u.ps.fbfetch_layered)
+ addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11);
+
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+ addr[chan++] = si_get_sample_id(ctx);
+
+ while (chan < 4)
+ addr[chan++] = LLVMGetUndef(ctx->i32);
+
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa) {
+ fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
+ LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
+
+ ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false);
+ }
+
+ addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr));
+
+ args.opcode = ac_image_load;
+ args.resource = image;
+ args.addr = addr_vec;
+ args.dmask = 0xf;
+ args.da = ctx->shader->key.mono.u.ps.fbfetch_layered;
+
+ emit_data->output[emit_data->chan] =
+ ac_build_image_opcode(&ctx->ac, &args);
+}
+
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
+ bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch;
+
bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
si_context_add_resource_size(ctx, surf->base.texture);
}
+ si_update_ps_colorbuf0_slot(sctx);
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
8, /* 16x MSAA */
};
unsigned log_samples = util_logbase2(setup_samples);
+ unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
unsigned log_ps_iter_samples =
- util_logbase2(util_next_power_of_two(sctx->ps_iter_samples));
+ util_logbase2(util_next_power_of_two(ps_iter_samples));
radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
- S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) |
+ S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
sc_mode_cntl_1);
} else if (sctx->smoothing_enabled) {
radeon_set_context_reg(cs, R_028804_DB_EQAA,
}
}
+void si_update_ps_iter_samples(struct si_context *sctx)
+{
+ if (sctx->framebuffer.nr_samples > 1)
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ if (sctx->screen->dpbb_allowed)
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+}
+
static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->ps_iter_samples = min_samples;
sctx->do_update_shaders = true;
- if (sctx->framebuffer.nr_samples > 1)
- si_mark_atom_dirty(sctx, &sctx->msaa_config);
- if (sctx->screen->dpbb_allowed)
- si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+ si_update_ps_iter_samples(sctx);
}
/*
SI_PS_CONST_POLY_STIPPLE,
SI_PS_CONST_SAMPLE_POSITIONS,
+ /* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */
+ SI_PS_IMAGE_COLORBUF0,
+ SI_PS_IMAGE_COLORBUF0_HI,
+ SI_PS_IMAGE_COLORBUF0_FMASK,
+ SI_PS_IMAGE_COLORBUF0_FMASK_HI,
+
SI_NUM_RW_BUFFERS,
};
unsigned base_level, unsigned first_level,
unsigned block_width, bool is_stencil,
uint32_t *state);
+void si_update_ps_colorbuf0_slot(struct si_context *sctx);
void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
uint slot, struct pipe_constant_buffer *cbuf);
void si_get_shader_buffers(struct si_context *sctx,
unsigned width0, unsigned height0,
unsigned force_level);
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
+void si_update_ps_iter_samples(struct si_context *sctx);
/* si_state_binning.c */
void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state);
/* Multiply the sum by some function of the number of samples. */
if (nr_samples >= 2) {
- if (sctx->ps_iter_samples >= 2)
+ if (si_get_ps_iter_samples(sctx) >= 2)
sum *= nr_samples;
else
sum *= 2;
}
key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
+
+ /* ps_uses_fbfetch is true only if the color buffer is bound. */
+ if (sctx->ps_uses_fbfetch) {
+ struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
+ struct pipe_resource *tex = cb0->texture;
+
+ /* 1D textures are allocated and used as 2D on GFX9. */
+ key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
+ key->mono.u.ps.fbfetch_is_1D = sctx->b.chip_class != GFX9 &&
+ (tex->target == PIPE_TEXTURE_1D ||
+ tex->target == PIPE_TEXTURE_1D_ARRAY);
+ key->mono.u.ps.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ tex->target == PIPE_TEXTURE_3D;
+ }
break;
}
default:
si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
si_set_active_descriptors_for_shader(sctx, sel);
+ si_update_ps_colorbuf0_slot(sctx);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)