return ss;
}
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+ unsigned width0, unsigned height0)
+
+{
+ struct pipe_context *ctx = view->base.context;
+ struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+ uint64_t va;
+ int stride = util_format_get_blocksize(view->base.format);
+ unsigned format, num_format, format_comp, endian;
+ unsigned swizzle_res;
+ unsigned char swizzle[4];
+ const struct util_format_description *desc;
+
+ swizzle[0] = view->base.swizzle_r;
+ swizzle[1] = view->base.swizzle_g;
+ swizzle[2] = view->base.swizzle_b;
+ swizzle[3] = view->base.swizzle_a;
+
+ r600_vertex_data_type(view->base.format,
+ &format, &num_format, &format_comp,
+ &endian);
+
+ desc = util_format_description(view->base.format);
+
+ swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE);
+
+ va = r600_resource_va(ctx->screen, view->base.texture);
+ view->tex_resource = &tmp->resource;
+
+ view->skip_mip_address_reloc = true;
+ view->tex_resource_words[0] = va;
+ view->tex_resource_words[1] = width0 - 1;
+ view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+ S_030008_STRIDE(stride) |
+ S_030008_DATA_FORMAT(format) |
+ S_030008_NUM_FORMAT_ALL(num_format) |
+ S_030008_FORMAT_COMP_ALL(format_comp) |
+ S_030008_SRF_MODE_ALL(1) |
+ S_030008_ENDIAN_SWAP(endian);
+ view->tex_resource_words[3] = swizzle_res;
+ /*
+ * in theory dword 4 is for number of elements, for use with resinfo,
+ * but it seems to utterly fail to work, the amd gpu shader analyser
+ * uses a const buffer to store the element sizes for buffer txq
+ */
+ view->tex_resource_words[4] = 0;
+ view->tex_resource_words[5] = view->tex_resource_words[6] = 0;
+ view->tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
+ return &view->base;
+}
+
struct pipe_sampler_view *
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
struct pipe_resource *texture,
view->base.reference.count = 1;
view->base.context = ctx;
+ if (texture->target == PIPE_BUFFER)
+ return texture_buffer_sampler_view(view, width0, height0);
+
swizzle[0] = state->swizzle_r;
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
fprintf(stderr, "--------------------------------------\n");
}
-static void r600_vertex_data_type(enum pipe_format pformat,
+void r600_vertex_data_type(enum pipe_format pformat,
unsigned *format,
unsigned *num_format, unsigned *format_comp, unsigned *endian)
{
void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
+void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+ unsigned *num_format, unsigned *format_comp, unsigned *endian);
#endif
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 256;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 130;
+ return 140;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 0;
/* Stream output. */
#define R600_TRACE_CS 0
#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
#define R600_MAX_CONST_BUFFER_SIZE 4096
uint32_t compressed_depthtex_mask; /* which textures are depth */
uint32_t compressed_colortex_mask;
boolean dirty_txq_constants;
+ boolean dirty_buffer_constants;
};
struct r600_sampler_states {
/* cube array txq workaround */
uint32_t *txq_constants;
+ /* buffer related workarounds */
+ uint32_t *buffer_constants;
};
struct r600_fence {
const struct pipe_surface *templ,
unsigned width, unsigned height);
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view,
+ boolean vtx);
+
/* r600_state_common.c */
void r600_init_common_state_functions(struct r600_context *rctx);
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom);
return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
}
+static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
+{
+ struct r600_bytecode_vtx vtx;
+ struct r600_bytecode_alu alu;
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int src_gpr, r, i;
+ int id = tgsi_tex_get_src_gpr(ctx, 1);
+
+ src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
+ if (src_requires_loading) {
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ src_gpr = ctx->temp_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.inst = 0;
+ vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = src_gpr;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */
+ vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */
+ vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
+ vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
+ vtx.use_const_fields = 1;
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ if (ctx->bc->chip_class >= EVERGREEN)
+ return 0;
+
+ for (i = 0; i < 4; i++) {
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+
+ alu.dst.chan = i;
+ alu.dst.sel = vtx.dst_gpr;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = vtx.dst_gpr;
+ alu.src[0].chan = i;
+
+ alu.src[1].sel = 512 + (id * 2);
+ alu.src[1].chan = i % 4;
+ alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (inst->Dst[0].Register.WriteMask & 3) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT);
+
+ alu.dst.chan = 3;
+ alu.dst.sel = vtx.dst_gpr;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = vtx.dst_gpr;
+ alu.src[0].chan = 3;
+
+ alu.src[1].sel = 512 + (id * 2) + 1;
+ alu.src[1].chan = 0;
+ alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ int id = tgsi_tex_get_src_gpr(ctx, 1);
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ alu.src[0].sel = 512 + (id / 4);
+ alu.src[0].chan = id % 4;
+ } else {
+ /* r600 we have them at channel 2 of the second dword */
+ alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].chan = 1;
+ }
+ alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
static float one_point_five = 1.5f;
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
+ if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
+ ctx->shader->uses_tex_buffers = true;
+ return r600_do_buffer_txq(ctx);
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
+ if (ctx->bc->chip_class < EVERGREEN)
+ ctx->shader->uses_tex_buffers = true;
+ return do_vtx_fetch_inst(ctx, src_requires_loading);
+ }
+ }
+
if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
/* get offset values */
if (inst->Texture.NumOffsets) {
boolean vs_out_misc_write;
boolean vs_out_point_size;
boolean has_txq_cube_array_z_comp;
+ boolean uses_tex_buffers;
};
struct r600_shader_key {
return ss;
}
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+ unsigned width0, unsigned height0)
+
+{
+ struct pipe_context *ctx = view->base.context;
+ struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+ uint64_t va;
+ int stride = util_format_get_blocksize(view->base.format);
+ unsigned format, num_format, format_comp, endian;
+
+ r600_vertex_data_type(view->base.format,
+ &format, &num_format, &format_comp,
+ &endian);
+
+ va = r600_resource_va(ctx->screen, view->base.texture);
+ view->tex_resource = &tmp->resource;
+
+ view->skip_mip_address_reloc = true;
+ view->tex_resource_words[0] = va;
+ view->tex_resource_words[1] = width0 - 1;
+ view->tex_resource_words[2] = S_038008_BASE_ADDRESS_HI(va >> 32UL) |
+ S_038008_STRIDE(stride) |
+ S_038008_DATA_FORMAT(format) |
+ S_038008_NUM_FORMAT_ALL(num_format) |
+ S_038008_FORMAT_COMP_ALL(format_comp) |
+ S_038008_SRF_MODE_ALL(1) |
+ S_038008_ENDIAN_SWAP(endian);
+ view->tex_resource_words[3] = 0;
+ /*
+ * in theory dword 4 is for number of elements, for use with resinfo,
+ * but it seems to utterly fail to work, the amd gpu shader analyser
+ * uses a const buffer to store the element sizes for buffer txq
+ */
+ view->tex_resource_words[4] = 0;
+ view->tex_resource_words[5] = 0;
+ view->tex_resource_words[6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_BUFFER);
+ return &view->base;
+}
+
struct pipe_sampler_view *
r600_create_sampler_view_custom(struct pipe_context *ctx,
struct pipe_resource *texture,
view->base.reference.count = 1;
view->base.context = ctx;
+ if (texture->target == PIPE_BUFFER)
+ return texture_buffer_sampler_view(view, texture->width0, 1);
+
swizzle[0] = state->swizzle_r;
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
struct r600_texture *rtex =
(struct r600_texture*)rviews[i]->base.texture;
- if (rtex->is_depth && !rtex->is_flushing_texture) {
- dst->views.compressed_depthtex_mask |= 1 << i;
- } else {
- dst->views.compressed_depthtex_mask &= ~(1 << i);
- }
+ if (rviews[i]->base.texture->target != PIPE_BUFFER) {
+ if (rtex->is_depth && !rtex->is_flushing_texture) {
+ dst->views.compressed_depthtex_mask |= 1 << i;
+ } else {
+ dst->views.compressed_depthtex_mask &= ~(1 << i);
+ }
- /* Track compressed colorbuffers. */
- if (rtex->cmask_size && rtex->fmask_size) {
- dst->views.compressed_colortex_mask |= 1 << i;
- } else {
- dst->views.compressed_colortex_mask &= ~(1 << i);
+ /* Track compressed colorbuffers. */
+ if (rtex->cmask_size && rtex->fmask_size) {
+ dst->views.compressed_colortex_mask |= 1 << i;
+ } else {
+ dst->views.compressed_colortex_mask &= ~(1 << i);
+ }
}
-
/* Changing from array to non-arrays textures and vice versa requires
* updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
if (rctx->chip_class <= R700 &&
dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
dst->views.dirty_txq_constants = TRUE;
+ dst->views.dirty_buffer_constants = TRUE;
r600_sampler_views_dirty(rctx, &dst->views);
if (dirty_sampler_states_mask) {
rctx->sample_mask.atom.dirty = true;
}
+/*
+ * On r600/700 hw we don't have vertex fetch swizzle, though TBO
+ * doesn't require full swizzles it does need masking and setting alpha
+ * to one, so we setup a set of 5 constants with the masks + alpha value
+ * then in the shader, we AND the 4 components with 0xffffffff or 0,
+ * then OR the alpha with the value given here.
+ * We use a 6th constant to store the txq buffer size in
+ */
+static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type)
+{
+ struct r600_textures_info *samplers = &rctx->samplers[shader_type];
+ int bits;
+ uint32_t array_size;
+ struct pipe_constant_buffer cb;
+ int i, j;
+
+ if (!samplers->views.dirty_buffer_constants)
+ return;
+
+ samplers->views.dirty_buffer_constants = FALSE;
+
+ bits = util_last_bit(samplers->views.enabled_mask);
+ array_size = bits * 8 * sizeof(uint32_t) * 4;
+ samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
+ memset(samplers->buffer_constants, 0, array_size);
+ for (i = 0; i < bits; i++) {
+ if (samplers->views.enabled_mask & (1 << i)) {
+ int offset = i * 8;
+ const struct util_format_description *desc;
+ desc = util_format_description(samplers->views.views[i]->base.format);
+
+ for (j = 0; j < 4; j++)
+ if (j < desc->nr_channels)
+ samplers->buffer_constants[offset+j] = 0xffffffff;
+ else
+ samplers->buffer_constants[offset+j] = 0x0;
+ if (desc->nr_channels < 4) {
+ if (desc->channel[0].pure_integer)
+ samplers->buffer_constants[offset+4] = 1;
+ else
+ samplers->buffer_constants[offset+4] = 0x3f800000;
+ } else
+ samplers->buffer_constants[offset + 4] = 0;
+
+ samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ }
+ }
+
+ cb.buffer = NULL;
+ cb.user_buffer = samplers->buffer_constants;
+ cb.buffer_offset = 0;
+ cb.buffer_size = array_size;
+ rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+}
+
+/* On evergreen we only need to store the buffer size for TXQ */
+static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
+{
+ struct r600_textures_info *samplers = &rctx->samplers[shader_type];
+ int bits;
+ uint32_t array_size;
+ struct pipe_constant_buffer cb;
+ int i;
+
+ if (!samplers->views.dirty_buffer_constants)
+ return;
+
+ samplers->views.dirty_buffer_constants = FALSE;
+
+ bits = util_last_bit(samplers->views.enabled_mask);
+ array_size = bits * sizeof(uint32_t) * 4;
+ samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
+ memset(samplers->buffer_constants, 0, array_size);
+ for (i = 0; i < bits; i++)
+ if (samplers->views.enabled_mask & (1 << i))
+ samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+
+ cb.buffer = NULL;
+ cb.user_buffer = samplers->buffer_constants;
+ cb.buffer_offset = 0;
+ cb.buffer_size = array_size;
+ rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+}
+
static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type)
{
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
if (ps_dirty)
r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+ /* on R600 we stuff masks + txq info into one constant buffer */
+ /* on evergreen we only need a txq info one */
+ if (rctx->chip_class < EVERGREEN) {
+ if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
+ r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+ if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
+ r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+ } else {
+ if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
+ eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+ if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
+ eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+ }
+
+
if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)
r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
r600->context.surface_destroy = r600_surface_destroy;
}
-static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view)
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view,
+ boolean vtx)
{
unsigned i;
unsigned char swizzle[4];
unsigned result = 0;
- const uint32_t swizzle_shift[4] = {
+ const uint32_t tex_swizzle_shift[4] = {
16, 19, 22, 25,
};
+ const uint32_t vtx_swizzle_shift[4] = {
+ 3, 6, 9, 12,
+ };
const uint32_t swizzle_bit[4] = {
0, 1, 2, 3,
};
+ const uint32_t *swizzle_shift = tex_swizzle_shift;
+
+ if (vtx)
+ swizzle_shift = vtx_swizzle_shift;
if (swizzle_view) {
util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
};
desc = util_format_description(format);
- word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+ word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {