if (rctx->chip_class == CAYMAN) {
r600_store_context_reg(&rs->buffer, CM_R_028BE4_PA_SU_VTX_CNTL,
- S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules) |
+ S_028C08_PIX_CENTER_HALF(state->half_pixel_center) |
S_028C08_QUANT_MODE(V_028C08_X_1_256TH));
} else {
r600_store_context_reg(&rs->buffer, R_028C08_PA_SU_VTX_CNTL,
- S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules) |
+ S_028C08_PIX_CENTER_HALF(state->half_pixel_center) |
S_028C08_QUANT_MODE(V_028C08_X_1_256TH));
}
unsigned swizzle_res;
unsigned char swizzle[4];
const struct util_format_description *desc;
+ unsigned offset = view->base.u.buf.first_element * stride;
+ unsigned size = (view->base.u.buf.last_element - view->base.u.buf.first_element + 1) * stride;
swizzle[0] = view->base.swizzle_r;
swizzle[1] = view->base.swizzle_g;
swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE);
- va = r600_resource_va(ctx->screen, view->base.texture);
+ va = r600_resource_va(ctx->screen, view->base.texture) + offset;
view->tex_resource = &tmp->resource;
view->skip_mip_address_reloc = true;
view->tex_resource_words[0] = va;
- view->tex_resource_words[1] = width0 - 1;
+ view->tex_resource_words[1] = size - 1;
view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
S_030008_STRIDE(stride) |
S_030008_DATA_FORMAT(format) |
surf->db_htile_data_base = va >> 8;
surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
S_028ABC_HTILE_HEIGHT(1) |
+ S_028ABC_FULL_CACHE(1) |
S_028ABC_LINEAR(1);
surf->db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
surf->db_preload_control = 0;
(((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \
(((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
+/* 2xMSAA
+ * There are two locations (-4, 4), (4, -4). */
+static uint32_t sample_locs_2x[] = {
+ FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+ FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+ FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+ FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+};
+static unsigned max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
+static uint32_t sample_locs_4x[] = {
+ FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+ FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+ FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+ FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+};
+static unsigned max_dist_4x = 6;
+/* 8xMSAA */
+static uint32_t sample_locs_8x[] = {
+ FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
+ FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
+ FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
+ FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
+ FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
+ FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
+ FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
+ FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
+};
+static unsigned max_dist_8x = 7;
+
+static void evergreen_get_sample_position(struct pipe_context *ctx,
+ unsigned sample_count,
+ unsigned sample_index,
+ float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4);
+ val.idx = (sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
{
- /* 2xMSAA
- * There are two locations (-4, 4), (4, -4). */
- static uint32_t sample_locs_2x[] = {
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- };
- static unsigned max_dist_2x = 4;
- /* 4xMSAA
- * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
- static uint32_t sample_locs_4x[] = {
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- };
- static unsigned max_dist_4x = 6;
- /* 8xMSAA */
- static uint32_t sample_locs_8x[] = {
- FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
- FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
- FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
- FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
- FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
- FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
- FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3),
- FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7),
- };
- static unsigned max_dist_8x = 7;
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
unsigned max_dist = 0;
}
}
+/* Cayman 8xMSAA */
+static uint32_t cm_sample_locs_8x[] = {
+ FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+ FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+ FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+ FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+ FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
+ FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
+ FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
+ FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
+};
+static unsigned cm_max_dist_8x = 8;
+/* Cayman 16xMSAA */
+static uint32_t cm_sample_locs_16x[] = {
+ FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+ FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+ FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+ FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+ FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+ FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+ FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+ FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+ FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+ FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+ FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+ FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+ FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+ FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+ FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+ FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+};
+static unsigned cm_max_dist_16x = 8;
+static void cayman_get_sample_position(struct pipe_context *ctx,
+ unsigned sample_count,
+ unsigned sample_index,
+ float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 16:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples)
{
- /* 2xMSAA
- * There are two locations (-4, 4), (4, -4). */
- static uint32_t sample_locs_2x[] = {
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
- };
- static unsigned max_dist_2x = 4;
- /* 4xMSAA
- * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
- static uint32_t sample_locs_4x[] = {
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
- };
- static unsigned max_dist_4x = 6;
- /* 8xMSAA */
- static uint32_t sample_locs_8x[] = {
- FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
- FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
- FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
- FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
- FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
- FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
- FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
- FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
- };
- static unsigned max_dist_8x = 8;
- /* 16xMSAA */
- static uint32_t sample_locs_16x[] = {
- FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
- FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
- FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
- FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
- FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
- FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
- FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
- FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
- FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
- FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
- FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
- FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
- FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
- FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
- FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
- FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
- };
- static unsigned max_dist_16x = 8;
+
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
unsigned max_dist = 0;
break;
case 8:
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
- r600_write_value(cs, sample_locs_8x[0]);
- r600_write_value(cs, sample_locs_8x[4]);
+ r600_write_value(cs, cm_sample_locs_8x[0]);
+ r600_write_value(cs, cm_sample_locs_8x[4]);
r600_write_value(cs, 0);
r600_write_value(cs, 0);
- r600_write_value(cs, sample_locs_8x[1]);
- r600_write_value(cs, sample_locs_8x[5]);
+ r600_write_value(cs, cm_sample_locs_8x[1]);
+ r600_write_value(cs, cm_sample_locs_8x[5]);
r600_write_value(cs, 0);
r600_write_value(cs, 0);
- r600_write_value(cs, sample_locs_8x[2]);
- r600_write_value(cs, sample_locs_8x[6]);
+ r600_write_value(cs, cm_sample_locs_8x[2]);
+ r600_write_value(cs, cm_sample_locs_8x[6]);
r600_write_value(cs, 0);
r600_write_value(cs, 0);
- r600_write_value(cs, sample_locs_8x[3]);
- r600_write_value(cs, sample_locs_8x[7]);
- max_dist = max_dist_8x;
+ r600_write_value(cs, cm_sample_locs_8x[3]);
+ r600_write_value(cs, cm_sample_locs_8x[7]);
+ max_dist = cm_max_dist_8x;
break;
case 16:
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
- r600_write_value(cs, sample_locs_16x[0]);
- r600_write_value(cs, sample_locs_16x[4]);
- r600_write_value(cs, sample_locs_16x[8]);
- r600_write_value(cs, sample_locs_16x[12]);
- r600_write_value(cs, sample_locs_16x[1]);
- r600_write_value(cs, sample_locs_16x[5]);
- r600_write_value(cs, sample_locs_16x[9]);
- r600_write_value(cs, sample_locs_16x[13]);
- r600_write_value(cs, sample_locs_16x[2]);
- r600_write_value(cs, sample_locs_16x[6]);
- r600_write_value(cs, sample_locs_16x[10]);
- r600_write_value(cs, sample_locs_16x[14]);
- r600_write_value(cs, sample_locs_16x[3]);
- r600_write_value(cs, sample_locs_16x[7]);
- r600_write_value(cs, sample_locs_16x[11]);
- r600_write_value(cs, sample_locs_16x[15]);
- max_dist = max_dist_16x;
+ r600_write_value(cs, cm_sample_locs_16x[0]);
+ r600_write_value(cs, cm_sample_locs_16x[4]);
+ r600_write_value(cs, cm_sample_locs_16x[8]);
+ r600_write_value(cs, cm_sample_locs_16x[12]);
+ r600_write_value(cs, cm_sample_locs_16x[1]);
+ r600_write_value(cs, cm_sample_locs_16x[5]);
+ r600_write_value(cs, cm_sample_locs_16x[9]);
+ r600_write_value(cs, cm_sample_locs_16x[13]);
+ r600_write_value(cs, cm_sample_locs_16x[2]);
+ r600_write_value(cs, cm_sample_locs_16x[6]);
+ r600_write_value(cs, cm_sample_locs_16x[10]);
+ r600_write_value(cs, cm_sample_locs_16x[14]);
+ r600_write_value(cs, cm_sample_locs_16x[3]);
+ r600_write_value(cs, cm_sample_locs_16x[7]);
+ r600_write_value(cs, cm_sample_locs_16x[11]);
+ r600_write_value(cs, cm_sample_locs_16x[15]);
+ max_dist = cm_max_dist_16x;
break;
}
struct r600_constbuf_state *state,
unsigned buffer_id_base,
unsigned reg_alu_constbuf_size,
- unsigned reg_alu_const_cache)
+ unsigned reg_alu_const_cache,
+ unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
va += cb->buffer_offset;
- r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
- ALIGN_DIVUP(cb->buffer_size >> 4, 16));
- r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8);
+ r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+ ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
+ r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+ pkt_flags);
- r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
- r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+ r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
r600_write_value(cs, (buffer_id_base + buffer_index) * 8);
r600_write_value(cs, va); /* RESOURCEi_WORD0 */
r600_write_value(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
- r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
dirty_mask &= ~(1 << buffer_index);
{
evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 176,
R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- R_028980_ALU_CONST_CACHE_VS_0);
+ R_028980_ALU_CONST_CACHE_VS_0,
+ 0 /* PKT3 flags */);
}
static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
- R_0289C0_ALU_CONST_CACHE_GS_0);
+ R_0289C0_ALU_CONST_CACHE_GS_0,
+ 0 /* PKT3 flags */);
}
static void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- R_028940_ALU_CONST_CACHE_PS_0);
+ R_028940_ALU_CONST_CACHE_PS_0,
+ 0 /* PKT3 flags */);
+}
+
+static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 816,
+ R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
+ R_028F40_ALU_CONST_CACHE_LS_0,
+ RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sampler_views(struct r600_context *rctx,
r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
- r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+ r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+ r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+ r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
- r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
- r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0);
+ r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
+ r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
+ r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
r600_store_context_reg(cb, R_0288EC_SQ_LDS_ALLOC_PS, 0);
r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 0);
void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_command_buffer *cb = &shader->command_buffer;
struct r600_shader *rshader = &shader->shader;
unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
- unsigned spi_baryc_cntl, sid, tmp, idx = 0;
+ unsigned spi_baryc_cntl, sid, tmp, num = 0;
unsigned z_export = 0, stencil_export = 0;
unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
+ uint32_t spi_ps_input_cntl[32];
- rstate->nregs = 0;
+ if (!cb->buf) {
+ r600_init_command_buffer(cb, 64);
+ } else {
+ cb->num_dw = 0;
+ }
for (i = 0; i < rshader->ninput; i++) {
/* evergreen NUM_INTERP only contains values interpolated into the LDS,
sid = rshader->input[i].spi_sid;
if (sid) {
-
tmp = S_028644_SEMANTIC(sid);
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION ||
tmp |= S_028644_PT_SPRITE_TEX(1);
}
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + idx * 4,
- tmp);
-
- idx++;
+ spi_ps_input_cntl[num++] = tmp;
}
}
+ r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num);
+ r600_store_array(cb, num, spi_ps_input_cntl);
+
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
z_export = 1;
spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) |
S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
- spi_input_z |= 1;
+ spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
}
spi_ps_in_control_1 = 0;
spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
- r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
- spi_ps_in_control_0);
- r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
- spi_ps_in_control_1);
- r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
- 0);
- r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z);
- r600_pipe_state_add_reg(rstate,
- R_0286E0_SPI_BARYC_CNTL,
- spi_baryc_cntl);
-
- r600_pipe_state_add_reg_bo(rstate,
- R_028840_SQ_PGM_START_PS,
- r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
- shader->bo, RADEON_USAGE_READ);
- r600_pipe_state_add_reg(rstate,
- R_028844_SQ_PGM_RESOURCES_PS,
- S_028844_NUM_GPRS(rshader->bc.ngpr) |
- S_028844_PRIME_CACHE_ON_DRAW(1) |
- S_028844_STACK_SIZE(rshader->bc.nstack));
- r600_pipe_state_add_reg(rstate,
- R_02884C_SQ_PGM_EXPORTS_PS,
- exports_ps);
+ r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
+ r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
+ r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
+
+ r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
+ r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
+ r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps);
+
+ r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2);
+ r600_store_value(cb, r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+ r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
+ S_028844_NUM_GPRS(rshader->bc.ngpr) |
+ S_028844_PRIME_CACHE_ON_DRAW(1) |
+ S_028844_STACK_SIZE(rshader->bc.nstack));
+ /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
shader->db_shader_control = db_shader_control;
shader->ps_depth_export = z_export | stencil_export;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
- unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split;
+ unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split, non_disp_tiling = 0;
uint64_t base, addr;
/* make sure that the dma ring is only one active */
dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
assert(dst_mode != src_mode);
+ /* non_disp_tiling bit needs to be set for depth, stencil, and fmask surfaces */
+ if (util_format_has_depth(util_format_description(src->format)))
+ non_disp_tiling = 1;
+
y = 0;
sub_cmd = 0x8;
lbpp = util_logbase2(bpp);
cs->buf[cs->cdw++] = (pitch_tile_max << 0) | ((height - 1) << 16);
cs->buf[cs->cdw++] = (slice_tile_max << 0);
cs->buf[cs->cdw++] = (x << 0) | (z << 18);
- cs->buf[cs->cdw++] = (y << 0) | (tile_split << 21) | (nbanks << 25);
+ cs->buf[cs->cdw++] = (y << 0) | (tile_split << 21) | (nbanks << 25) | (non_disp_tiling << 28);
cs->buf[cs->cdw++] = addr & 0xfffffffc;
cs->buf[cs->cdw++] = (addr >> 32UL) & 0xff;
copy_height -= cheight;
return FALSE;
}
+ /* 128 bpp surfaces require non_disp_tiling for both
+ * tiled and linear buffers on cayman. However, async
+ * DMA only supports it on the tiled side. As such
+ * the tile order is backwards after a L2T/T2L packet.
+ */
+ if ((rctx->chip_class == CAYMAN) &&
+ (src_mode != dst_mode) &&
+ (util_format_get_blocksize(src->format) >= 16)) {
+ return FALSE;
+ }
+
if (src_mode == dst_mode) {
uint64_t dst_offset, src_offset;
/* simple dma blit would do NOTE code here assume :
r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0);
r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, evergreen_emit_ps_constant_buffers, 0);
+ r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom, id++, evergreen_emit_cs_constant_buffers, 0);
/* shader program */
r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0);
/* sampler */
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
+ r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state;
rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple;
rctx->context.set_scissor_state = evergreen_set_scissor_state;
+
+ if (rctx->chip_class == EVERGREEN)
+ rctx->context.get_sample_position = evergreen_get_sample_position;
+ else
+ rctx->context.get_sample_position = cayman_get_sample_position;
evergreen_init_compute_state_functions(rctx);
}