#include "fd6_const.h"
#include "fd6_pack.h"
+#define emit_const_user fd6_emit_const_user
+#define emit_const_bo fd6_emit_const_bo
#include "ir3_const.h"
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
-static void
-fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
- uint32_t regid, uint32_t offset, uint32_t sizedwords,
- const uint32_t *dwords, struct pipe_resource *prsc)
+void
+fd6_emit_const_user(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
{
- if (prsc) {
- struct fd_bo *bo = fd_resource(prsc)->bo;
-
- if (fd6_geom_stage(type)) {
- OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_INDIRECT,
- .state_block = fd6_stage2shadersb(type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_EXT_SRC_ADDR(
- .bo = bo,
- .bo_offset = offset
- )
- );
- } else {
- OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_INDIRECT,
- .state_block = fd6_stage2shadersb(type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_EXT_SRC_ADDR(
- .bo = bo,
- .bo_offset = offset
- )
- );
- }
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ /* NOTE we cheat a bit here, since we know mesa is aligning
+ * the size of the user buffer to 16 bytes. And we want to
+ * cut cycles in a hot path.
+ */
+ uint32_t align_sz = align(sizedwords, 4);
+
+ if (fd6_geom_stage(v->type)) {
+ OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
+ CP_LOAD_STATE6_0(
+ .dst_off = regid/4,
+ .state_type = ST6_CONSTANTS,
+ .state_src = SS6_DIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)
+ ),
+ CP_LOAD_STATE6_1(),
+ CP_LOAD_STATE6_2()
+ );
} else {
- /* NOTE we cheat a bit here, since we know mesa is aligning
- * the size of the user buffer to 16 bytes. And we want to
- * cut cycles in a hot path.
- */
- uint32_t align_sz = align(sizedwords, 4);
- dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
-
- if (fd6_geom_stage(type)) {
- OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_DIRECT,
- .state_block = fd6_stage2shadersb(type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_1(),
- CP_LOAD_STATE6_2()
- );
- } else {
- OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
- CP_LOAD_STATE6_0(
- .dst_off = regid/4,
- .state_type = ST6_CONSTANTS,
- .state_src = SS6_DIRECT,
- .state_block = fd6_stage2shadersb(type),
- .num_unit = DIV_ROUND_UP(sizedwords, 4)
- ),
- CP_LOAD_STATE6_1(),
- CP_LOAD_STATE6_2()
- );
- }
+ OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
+ CP_LOAD_STATE6_0(
+ .dst_off = regid/4,
+ .state_type = ST6_CONSTANTS,
+ .state_src = SS6_DIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)
+ ),
+ CP_LOAD_STATE6_1(),
+ CP_LOAD_STATE6_2()
+ );
}
}
-
-static void
-fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
- uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
+void
+fd6_emit_const_bo(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t regid,
+ uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
{
- uint32_t anum = align(num, 2);
- uint32_t i;
-
- debug_assert((regid % 4) == 0);
-
- OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum));
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
- CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-
- for (i = 0; i < num; i++) {
- if (prscs[i]) {
- if (write) {
- OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
- } else {
- OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
- }
- } else {
- OUT_RING(ring, 0xbad00000 | (i << 16));
- OUT_RING(ring, 0xbad00000 | (i << 16));
- }
- }
-
- for (; i < anum; i++) {
- OUT_RING(ring, 0xffffffff);
- OUT_RING(ring, 0xffffffff);
+ emit_const_asserts(ring, v, regid, sizedwords);
+
+ if (fd6_geom_stage(v->type)) {
+ OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
+ CP_LOAD_STATE6_0(
+ .dst_off = regid/4,
+ .state_type = ST6_CONSTANTS,
+ .state_src = SS6_INDIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)
+ ),
+ CP_LOAD_STATE6_EXT_SRC_ADDR(
+ .bo = bo,
+ .bo_offset = offset
+ )
+ );
+ } else {
+ OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
+ CP_LOAD_STATE6_0(
+ .dst_off = regid/4,
+ .state_type = ST6_CONSTANTS,
+ .state_src = SS6_INDIRECT,
+ .state_block = fd6_stage2shadersb(v->type),
+ .num_unit = DIV_ROUND_UP(sizedwords, 4)
+ ),
+ CP_LOAD_STATE6_EXT_SRC_ADDR(
+ .bo = bo,
+ .bo_offset = offset
+ )
+ );
}
}
return true;
}
-void
-emit_const(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, uint32_t dst_offset,
- uint32_t offset, uint32_t size, const void *user_buffer,
- struct pipe_resource *buffer)
-{
- /* TODO inline this */
- assert(dst_offset + size <= v->constlen * 4);
- fd6_emit_const(ring, v->type, dst_offset,
- offset, size, user_buffer, buffer);
-}
-
static void
-emit_const_bo(struct fd_ringbuffer *ring,
- const struct ir3_shader_variant *v, bool write, uint32_t dst_offset,
+emit_const_ptrs(struct fd_ringbuffer *ring,
+ const struct ir3_shader_variant *v, uint32_t dst_offset,
uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
{
- /* TODO inline this */
- assert(dst_offset + num < v->constlen * 4);
- fd6_emit_const_bo(ring, v->type, write, dst_offset, num, prscs, offsets);
+ unreachable("shouldn't be called on a6xx");
}
static void
emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s)
{
struct fd_context *ctx = emit->ctx;
- const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4;
+ const struct ir3_const_state *const_state = ir3_const_state(s);
+ const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
uint32_t dwords = 16;
OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
uint32_t *params, int num_params)
{
- const unsigned regid = v->shader->const_state.offsets.primitive_param;
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ const unsigned regid = const_state->offsets.primitive_param;
int size = MIN2(1 + regid, v->constlen) - regid;
if (size > 0)
- fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL);
+ fd6_emit_const_user(ring, v, regid * 4, num_params, params);
}
static void
emit->gs->shader->nir->info.gs.vertices_in;
uint32_t vs_params[4] = {
- emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
- emit->vs->shader->output_size * 4, /* vs vertex stride */
+ emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
+ emit->vs->output_size * 4, /* vs vertex stride */
0,
0
};
if (emit->hs) {
uint32_t hs_params[4] = {
- emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
- emit->vs->shader->output_size * 4, /* vs vertex stride */
- emit->hs->shader->output_size,
+ emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
+ emit->vs->output_size * 4, /* vs vertex stride */
+ emit->hs->output_size,
emit->info->vertices_per_patch
};
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
uint32_t ds_params[4] = {
- emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */
- emit->ds->shader->output_size * 4, /* ds vertex stride */
- emit->hs->shader->output_size, /* hs vertex stride (dwords) */
+ emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
+ emit->ds->output_size * 4, /* ds vertex stride */
+ emit->hs->output_size, /* hs vertex stride (dwords) */
emit->hs->shader->nir->info.tess.tcs_vertices_out
};
prev = emit->vs;
uint32_t gs_params[4] = {
- prev->shader->output_size * num_vertices * 4, /* ds primitive stride */
- prev->shader->output_size * 4, /* ds vertex stride */
+ prev->output_size * num_vertices * 4, /* ds primitive stride */
+ prev->output_size * 4, /* ds vertex stride */
0,
0,
};
fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
}
+static void
+fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+{
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ int num_ubos = const_state->num_ubos;
+
+ if (!num_ubos)
+ return;
+
+ OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
+ CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
+ OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+
+ for (int i = 0; i < num_ubos; i++) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[i];
+
+ /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
+ * to a buffer now, and save it in the constbuf so that we don't have
+ * to reupload until they get changed.
+ */
+ if (cb->user_buffer) {
+ struct pipe_context *pctx = &ctx->base;
+ u_upload_data(pctx->stream_uploader, 0,
+ cb->buffer_size,
+ 64,
+ cb->user_buffer,
+ &cb->buffer_offset, &cb->buffer);
+ cb->user_buffer = NULL;
+ }
+
+ if (cb->buffer) {
+ int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
+ OUT_RELOC(ring, fd_resource(cb->buffer)->bo,
+ cb->buffer_offset,
+ (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32,
+ 0);
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
+ OUT_RING(ring, A6XX_UBO_1_SIZE(0));
+ }
+ }
+}
+
+static unsigned
+user_consts_cmdstream_size(struct ir3_shader_variant *v)
+{
+ struct ir3_const_state *const_state = ir3_const_state(v);
+ struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
+
+ if (unlikely(!ubo_state->cmdstream_size)) {
+ unsigned packets, size;
+
+ /* pre-calculate size required for userconst stateobj: */
+ ir3_user_consts_size(ubo_state, &packets, &size);
+
+ /* also account for UBO addresses: */
+ packets += 1;
+ size += 2 * const_state->num_ubos;
+
+ unsigned sizedwords = (4 * packets) + size;
+ ubo_state->cmdstream_size = sizedwords * 4;
+ }
+
+ return ubo_state->cmdstream_size;
+}
+
static void
emit_user_consts(struct fd6_emit *emit)
{
PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
};
- const struct ir3_shader_variant *variants[] = {
+ struct ir3_shader_variant *variants[] = {
emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
};
struct fd_context *ctx = emit->ctx;
for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
if (!variants[i])
continue;
- sz += variants[i]->shader->ubo_state.cmdstream_size;
+ sz += user_consts_cmdstream_size(variants[i]);
}
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
if (!variants[i])
continue;
ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
- ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
+ fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
}
fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
struct fd_context *ctx, const struct pipe_grid_info *info)
{
ir3_emit_cs_consts(v, ring, ctx, info);
+ fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
}
void
ir3_emit_immediates(screen, v, ring);
}
-void
-fd6_user_consts_size(struct ir3_ubo_analysis_state *state,
- unsigned *packets, unsigned *size)
-{
- ir3_user_consts_size(state, packets, size);
-}
-
void
fd6_emit_link_map(struct fd_screen *screen,
const struct ir3_shader_variant *producer,