fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
{
+ uint32_t anum = align(num, 4);
uint32_t i;
debug_assert((regid % 4) == 0);
- debug_assert((num % 4) == 0);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
- CP_LOAD_STATE_0_NUM_UNIT(num/2));
+ CP_LOAD_STATE_0_NUM_UNIT(anum/2));
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
+
+ for (; i < anum; i++)
+ OUT_RING(ring, 0xffffffff);
}
#define VERT_TEX_OFF 0
fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
{
+ uint32_t anum = align(num, 4);
uint32_t i;
debug_assert((regid % 4) == 0);
- debug_assert((num % 4) == 0);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
- CP_LOAD_STATE_0_NUM_UNIT(num/4));
+ CP_LOAD_STATE_0_NUM_UNIT(anum/4));
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
+
+ for (; i < anum; i++)
+ OUT_RING(ring, 0xffffffff);
}
static void
bool error;
};
+/* gpu pointer size in units of 32bit registers/slots */
+static unsigned pointer_size(struct ir3_compile *ctx)
+{
+ return (ctx->compiler->gpu_id >= 500) ? 2 : 1;
+}
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
nir_print_shader(ctx->s, stdout);
}
- so->first_driver_param = so->first_immediate = align(ctx->s->num_uniforms, 4);
+ so->num_uniforms = ctx->s->num_uniforms;
+ so->num_ubos = ctx->s->info->num_ubos;
- /* Layout of constant registers:
+ /* Layout of constant registers, each section aligned to vec4. Note
+ * that pointer size (ubo, etc) changes depending on generation.
*
- * num_uniform * vec4 - user consts
- * 4 * vec4 - UBO addresses
+ * user consts
+ * UBO addresses
* if (vertex shader) {
- * N * vec4 - driver params (IR3_DP_*)
- * 1 * vec4 - stream-out addresses
+ * driver params (IR3_DP_*)
+ * if (stream_output.num_outputs > 0)
+ * stream-out addresses
* }
+ * immediates
*
- * TODO this could be made more dynamic, to at least skip sections
- * that we don't need..
+ * Immediates go last mostly because they are inserted in the CP pass
+ * after the nir -> ir3 frontend.
*/
+ unsigned constoff = align(ctx->s->num_uniforms, 4);
+ unsigned ptrsz = pointer_size(ctx);
- /* reserve 4 (vec4) slots for ubo base addresses: */
- so->first_immediate += 4;
+ memset(&so->constbase, ~0, sizeof(so->constbase));
+
+ if (so->num_ubos > 0) {
+ so->constbase.ubo = constoff;
+ constoff += align(ctx->s->info->num_ubos * ptrsz, 4) / 4;
+ }
if (so->type == SHADER_VERTEX) {
- /* driver params (see ir3_driver_param): */
- so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */
- /* one (vec4) slot for stream-output base addresses: */
- so->first_immediate++;
+ so->constbase.driver_param = constoff;
+ constoff += align(IR3_DP_COUNT, 4) / 4;
+
+ if (so->shader->stream_output.num_outputs > 0) {
+ so->constbase.tfbo = constoff;
+ constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4;
+ }
}
+ so->constbase.immediate = constoff;
+
return ctx;
}
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
- unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+ unsigned n = ctx->so->constbase.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx, r);
}
struct ir3_instruction *addr, *src0, *src1;
nir_const_value *const_offset;
/* UBO addresses are the first driver params: */
- unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
+ unsigned ubo = regid(ctx->so->constbase.ubo, 0);
int off = 0;
/* First src is ubo index, which could either be an immed or not: */
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
- base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
+ base = create_uniform(ctx, regid(v->constbase.tfbo, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
- reg->num = i + (4 * ctx->so->first_immediate);
+ reg->num = i + (4 * ctx->so->constbase.immediate);
return reg;
}
}
for (i = 0; i < so->immediates_count; i++) {
- debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+ debug_printf("@const(c%d.x)\t", so->constbase.immediate + i);
debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
so->immediates[i].val[0],
so->immediates[i].val[1],
* the user consts early to avoid HLSQ lockup caused by
* writing too many consts
*/
- uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+ uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
- uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
+ uint32_t offset = v->constbase.ubo;
if (v->constlen > offset) {
- uint32_t params = MIN2(4, v->constlen - offset) * 4;
+ uint32_t params = v->num_ubos;
uint32_t offsets[params];
struct pipe_resource *prscs[params];
struct fd_ringbuffer *ring)
{
int size = v->immediates_count;
- uint32_t base = v->first_immediate;
+ uint32_t base = v->constbase.immediate;
/* truncate size to avoid writing constants that shader
* does not use:
struct fd_ringbuffer *ring)
{
/* streamout addresses after driver-params: */
- uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
+ uint32_t offset = v->constbase.tfbo;
if (v->constlen > offset) {
struct fd_streamout_stateobj *so = &ctx->streamout;
struct pipe_stream_output_info *info = &v->shader->stream_output;
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info && (v->type == SHADER_VERTEX)) {
- uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
- if (v->constlen >= offset) {
+ uint32_t offset = v->constbase.driver_param;
+ if (v->constlen > offset) {
uint32_t vertex_params[IR3_DP_COUNT] = {
[IR3_DP_VTXID_BASE] = info->indexed ?
info->index_bias : info->start,
IR3_DP_COUNT = 36 /* must be aligned to vec4 */
};
-/* Layout of constant registers:
- *
- * num_uniform * vec4 - user consts
- * 4 * vec4 - UBO addresses
- * if (vertex shader) {
- * N * vec4 - driver params (IR3_DP_*)
- * 1 * vec4 - stream-out addresses
- * }
- *
- * TODO this could be made more dynamic, to at least skip sections
- * that we don't need..
- */
-#define IR3_UBOS_OFF 0 /* UBOs after user consts */
-#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */
-#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
-
/* Configuration key used to identify a shader variant.. different
* shader variants can be used to implement features not supported
* in hw (two sided color), binning-pass vertex shader, etc.
*/
unsigned constlen;
+ /* number of uniforms (in vec4), not including built-in compiler
+ * constants, etc.
+ */
+ unsigned num_uniforms;
+ unsigned num_ubos;
+
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
/* do we have kill instructions: */
bool has_kill;
- /* const reg # of first immediate, ie. 1 == c1
- * (not regid, because TGSI thinks in terms of vec4 registers,
- * not scalar registers)
+ /* Layout of constant registers, each section (in vec4). Pointer size
+ * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
+ * UBO and stream-out consts.
*/
- unsigned first_driver_param;
- unsigned first_immediate;
+ struct {
+ /* user const start at zero */
+ unsigned ubo;
+ unsigned driver_param;
+ unsigned tfbo;
+ unsigned immediate;
+ } constbase;
+
unsigned immediates_count;
struct {
uint32_t val[4];