const struct tgsi_token *tokens;
bool free_tokens;
struct ir3_shader *ir;
- struct fd3_shader_stateobj *so;
+ struct fd3_shader_variant *so;
struct ir3_block *block;
struct ir3_instruction *current_instr;
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[6];
- /* inputs start at r0, temporaries start after last input, and
- * outputs start after last temporary.
- *
- * We could be more clever, because this is not a hw restriction,
- * but probably best just to implement an optimizing pass to
- * reduce the # of registers used and get rid of redundant mov's
- * (to output register).
- */
- unsigned base_reg[TGSI_FILE_COUNT];
-
/* idx/slot for last compiler generated immediate */
unsigned immediate_idx;
static type_t get_ftype(struct fd3_compile_context *ctx);
static unsigned
-compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
const struct tgsi_token *tokens)
{
- unsigned ret, base = 0;
+ unsigned ret;
struct tgsi_shader_info *info = &ctx->info;
const struct fd_lowering_config lconfig = {
.lower_DST = true,
ctx->num_output_updates = 0;
ctx->atomic = false;
- memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
-
#define FM(x) (1 << TGSI_FILE_##x)
/* optimize can't deal with relative addressing: */
if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) |
return TGSI_PARSE_ERROR;
/* Immediates go after constants: */
- ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
- ctx->base_reg[TGSI_FILE_IMMEDIATE] =
- info->file_max[TGSI_FILE_CONSTANT] + 1;
-
- /* if full precision and fragment shader, don't clobber
- * r0.xy w/ bary fetch:
- */
- if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
- base = 1;
-
- /* Temporaries after outputs after inputs: */
- ctx->base_reg[TGSI_FILE_INPUT] = base;
- ctx->base_reg[TGSI_FILE_OUTPUT] = base +
- info->file_max[TGSI_FILE_INPUT] + 1;
- ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
- info->file_max[TGSI_FILE_INPUT] + 1 +
- info->file_max[TGSI_FILE_OUTPUT] + 1;
-
- so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+ so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
switch (dst->File) {
case TGSI_FILE_OUTPUT:
case TGSI_FILE_TEMPORARY:
- num = dst->Index + ctx->base_reg[dst->File];
+ /* uses SSA */
break;
case TGSI_FILE_ADDRESS:
num = REG_A0;
if (dst->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
* TGSI has vec4 immediates, we can only embed scalar (of limited
* size, depending on instruction..)
*/
+ flags |= IR3_REG_CONST;
+ num = src->Index + ctx->so->first_immediate;
+ break;
case TGSI_FILE_CONSTANT:
flags |= IR3_REG_CONST;
- num = src->Index + ctx->base_reg[src->File];
+ num = src->Index;
break;
case TGSI_FILE_OUTPUT:
/* NOTE: we should only end up w/ OUTPUT file for things like
*/
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
- num = src->Index + ctx->base_reg[src->File];
+ /* uses SSA */
break;
default:
compile_error(ctx, "unsupported src register file: %s\n",
flags |= IR3_REG_NEGATE;
if (src->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
struct tgsi_src_register *tmp_src;
int n;
- if (ctx->so->half_precision)
- return get_internal_temp(ctx, tmp_dst);
-
tmp_dst->File = TGSI_FILE_TEMPORARY;
tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
tmp_dst->Indirect = 0;
static type_t
get_ftype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ return TYPE_F32;
}
static type_t
get_utype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+ return TYPE_U32;
}
static unsigned
a1 = &inst->Src[1].Register;
a2 = &inst->Src[2].Register;
/* sel.{b32,b16} dst, src2, tmp, src1 */
- instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_B16 : OPC_SEL_B32);
+ instr = instr_create(ctx, 3, OPC_SEL_B32);
vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
break;
}
}
- instr = instr_create(ctx, 3,
- ctx->so->half_precision ? t->hopc : t->opc);
+ instr = instr_create(ctx, 3, t->opc);
vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
static void
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
- unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
+ struct fd3_shader_variant *so = ctx->so;
unsigned i, flags = 0;
/* I don't think we should get frag shader input without
compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
decl->Declaration.Semantic);
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
-
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
unsigned n = so->inputs_count++;
- unsigned r = regid(i + base, 0);
+ unsigned r = regid(i, 0);
unsigned ncomp, j;
/* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
ncomp = 4;
- DBG("decl in -> r%d", i + base);
+ DBG("decl in -> r%d", i);
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
so->inputs[n].compmask = (1 << ncomp) - 1;
static void
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
- unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
+ struct fd3_shader_variant *so = ctx->so;
unsigned comp = 0;
unsigned name = decl->Semantic.Name;
unsigned i;
compile_assert(ctx, decl->Declaration.Semantic);
- DBG("decl out[%d] -> r%d", name, decl->Range.First + base);
+ DBG("decl out[%d] -> r%d", name, decl->Range.First);
if (ctx->type == TGSI_PROCESSOR_VERTEX) {
switch (name) {
ncomp = 4;
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
- so->outputs[n].regid = regid(i + base, comp);
+ so->outputs[n].regid = regid(i, comp);
/* avoid undefined outputs, stick a dummy mov from imm{0.0},
* which if the output is actually assigned will be over-
}
int
-fd3_compile_shader(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens)
+fd3_compile_shader(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens, struct fd3_shader_key key)
{
struct fd3_compile_context ctx;
unsigned i, actual_in;
#include "fd3_util.h"
-int fd3_compile_shader(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens);
-int fd3_compile_shader_old(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens);
+int fd3_compile_shader(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens,
+ struct fd3_shader_key key);
+int fd3_compile_shader_old(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens,
+ struct fd3_shader_key key);
#endif /* FD3_COMPILER_H_ */
bool free_tokens;
struct ir3_shader *ir;
struct ir3_block *block;
- struct fd3_shader_stateobj *so;
+ struct fd3_shader_variant *so;
struct tgsi_parse_context parser;
unsigned type;
struct tgsi_dst_register *dst, struct tgsi_src_register *src);
static unsigned
-compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
const struct tgsi_token *tokens)
{
unsigned ret, base = 0;
/* if full precision and fragment shader, don't clobber
* r0.x w/ bary fetch:
*/
- if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
+ if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision)
base = 1;
/* Temporaries after outputs after inputs: */
if (dst->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
flags |= IR3_REG_NEGATE;
if (src->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
struct tgsi_src_register *tmp_src;
int n;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
return get_internal_temp(ctx, tmp_dst);
tmp_dst->File = TGSI_FILE_TEMPORARY;
static type_t
get_ftype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32;
}
static type_t
get_utype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+ return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32;
}
static unsigned
if (t->tgsi_opc == TGSI_OPCODE_CMP) {
/* sel.{f32,f16} dst, src2, tmp, src1 */
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&inst->Src[2].Register, 0,
tmp_src, 0,
get_immediate(ctx, &constval1, fui(1.0));
/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&constval0, 0, tmp_src, 0, &constval1, 0);
}
}
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? t->hopc : t->opc);
+ ctx->so->key.half_precision ? t->hopc : t->opc);
vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
static int
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
+ struct fd3_shader_variant *so = ctx->so;
unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
unsigned i, flags = 0;
int nop = 0;
compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
decl->Declaration.Semantic);
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
static void
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
+ struct fd3_shader_variant *so = ctx->so;
unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
unsigned comp = 0;
unsigned name = decl->Semantic.Name;
}
int
-fd3_compile_shader_old(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens)
+fd3_compile_shader_old(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens, struct fd3_shader_key key)
{
struct fd3_compile_context ctx;
static void
-emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
+emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd3_shader_key key)
{
struct fd_vertex_stateobj *vtx = ctx->vtx;
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
bufs[i].format = elem->src_format;
}
- fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
+ bufs, vtx->num_elements);
}
static void
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
- struct fd_ringbuffer *ring, unsigned dirty, bool binning)
+ struct fd_ringbuffer *ring, unsigned dirty, struct fd3_shader_key key)
{
- fd3_emit_state(ctx, ring, &ctx->prog, dirty, binning);
+ fd3_emit_state(ctx, ring, &ctx->prog, dirty, key);
if (dirty & FD_DIRTY_VTXBUF)
- emit_vertexbufs(ctx, ring);
+ emit_vertexbufs(ctx, ring, key);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
- OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info);
+ fd_draw_emit(ctx, ring,
+ key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+ info);
}
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
unsigned dirty = ctx->dirty;
+ struct fd3_shader_key key = {
+ /* do binning pass first: */
+ .binning_pass = true,
+ .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ };
draw_impl(ctx, info, ctx->binning_ring,
- dirty & ~(FD_DIRTY_BLEND), true);
- draw_impl(ctx, info, ctx->ring, dirty, false);
+ dirty & ~(FD_DIRTY_BLEND), key);
+ /* and now regular (non-binning) pass: */
+ key.binning_pass = false;
+ draw_impl(ctx, info, ctx->ring, dirty, key);
}
/* binning pass cmds for a clear:
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->binning_ring;
+ struct fd3_shader_key key = {
+ .binning_pass = true,
+ };
- fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, true);
+ fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
unsigned ce, i;
+ struct fd3_shader_key key = {
+ };
dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
dirty |= FD_DIRTY_PROG;
fd3_clear_binning(ctx, dirty);
/* emit generic state now: */
- fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, false);
+ fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
fd_wfi(ctx, ring);
fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct fd3_shader_stateobj *shader)
+ struct fd3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t base = 0;
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog,
+ struct fd3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n)
{
- struct fd3_shader_stateobj *vp = prog->vp;
uint32_t i;
n = MIN2(n, vp->inputs_count);
void
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, uint32_t dirty, bool binning)
+ struct fd_program_stateobj *prog, uint32_t dirty,
+ struct fd3_shader_key key)
{
+ struct fd3_shader_variant *vp;
+ struct fd3_shader_variant *fp;
+
+ fp = fd3_shader_variant(prog->fp, key);
+ vp = fd3_shader_variant(prog->vp, key);
+
emit_marker(ring, 5);
if (dirty & FD_DIRTY_SAMPLE_MASK) {
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- if (!binning) {
+ if (!key.binning_pass) {
struct fd3_context *fd3_ctx = fd3_context(ctx);
/* I suppose if we needed to (which I don't *think* we need
}
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
if (fp->writes_pos) {
val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
}
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
if (fp->writes_pos) {
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd3_rasterizer_stateobj *rasterizer =
fd3_rasterizer_stateobj(ctx->rasterizer);
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t stride_in_vpc;
stride_in_vpc = align(fp->total_in, 4) / 4;
if (dirty & FD_DIRTY_PROG) {
fd_wfi(ctx, ring);
- fd3_program_emit(ring, prog, binning);
+ fd3_program_emit(ring, prog, key);
}
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
- (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
+ (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
emit_constants(ring, SB_FRAG_SHADER,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
};
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog,
+ struct fd3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n);
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, uint32_t dirty, bool binning);
+ struct fd_program_stateobj *prog, uint32_t dirty,
+ struct fd3_shader_key key);
void fd3_emit_restore(struct fd_context *ctx);
#endif /* FD3_EMIT_H */
#include "fd3_util.h"
#include "fd3_zsa.h"
+static const struct fd3_shader_key key = {
+};
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->solid_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_program_emit(ring, &ctx->solid_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->solid_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_program_emit(ring, &ctx->solid_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
uint32_t base = 0;
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->blit_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 2);
+ fd3_program_emit(ring, &ctx->blit_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->blit_texcoord_vbuf,
+ .stride = 8,
+ .format = PIPE_FORMAT_R32G32_FLOAT,
+ }, {
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 2);
/* for gmem pitch/base calculations, we need to use the non-
* truncated tile sizes:
#include "fd3_util.h"
static void
-delete_shader(struct fd3_shader_stateobj *so)
+delete_variant(struct fd3_shader_variant *v)
{
- ir3_shader_destroy(so->ir);
- fd_bo_del(so->bo);
- free(so);
+ ir3_shader_destroy(v->ir);
+ fd_bo_del(v->bo);
+ free(v);
}
static void
-assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
+assemble_variant(struct fd3_shader_variant *so)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(so->so->pctx);
uint32_t sz, *bin;
bin = ir3_shader_assemble(so->ir, &so->info);
* reflect the # of registers actually used:
*/
static void
-fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
+fixup_vp_regfootprint(struct fd3_shader_variant *so)
{
unsigned i;
for (i = 0; i < so->inputs_count; i++)
so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2);
}
-static struct fd3_shader_stateobj *
-create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
- enum shader_t type)
+static struct fd3_shader_variant *
+create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
{
- struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
- const struct tgsi_token *tokens = cso->tokens;
+ struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant);
+ const struct tgsi_token *tokens = so->tokens;
int ret;
- if (!so)
+ if (!v)
return NULL;
- so->type = type;
+ v->so = so;
+ v->key = key;
+ v->type = so->type;
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d", so->type);
tgsi_dump(tokens, 0);
}
- if ((type == SHADER_FRAGMENT) && (fd_mesa_debug & FD_DBG_FRAGHALF))
- so->half_precision = true;
-
-
if (!(fd_mesa_debug & FD_DBG_NOOPT)) {
- ret = fd3_compile_shader(so, tokens);
+ ret = fd3_compile_shader(v, tokens, key);
if (ret) {
debug_error("new compiler failed, trying fallback!");
- so->inputs_count = 0;
- so->outputs_count = 0;
- so->total_in = 0;
- so->samplers_count = 0;
- so->immediates_count = 0;
+ v->inputs_count = 0;
+ v->outputs_count = 0;
+ v->total_in = 0;
+ v->samplers_count = 0;
+ v->immediates_count = 0;
}
} else {
ret = -1; /* force fallback to old compiler */
}
if (ret)
- ret = fd3_compile_shader_old(so, tokens);
+ ret = fd3_compile_shader_old(v, tokens, key);
if (ret) {
debug_error("compile failed!");
goto fail;
}
- assemble_shader(pctx, so);
- if (!so->bo) {
+ assemble_variant(v);
+ if (!v->bo) {
debug_error("assemble failed!");
goto fail;
}
- if (type == SHADER_VERTEX)
- fixup_vp_regfootprint(so);
+ if (so->type == SHADER_VERTEX)
+ fixup_vp_regfootprint(v);
if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d", so->type);
- disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type);
+ DBG("disassemble: type=%d", v->type);
+ disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
}
- return so;
+ return v;
fail:
- delete_shader(so);
+ delete_variant(v);
return NULL;
}
+struct fd3_shader_variant *
+fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
+{
+ struct fd3_shader_variant *v;
+
+ /* some shader key values only apply to vertex or frag shader,
+ * so normalize the key to avoid constructing multiple identical
+ * variants:
+ */
+ if (so->type == SHADER_FRAGMENT) {
+ key.binning_pass = false;
+ }
+ if (so->type == SHADER_VERTEX) {
+ key.color_two_side = false;
+ key.half_precision = false;
+ }
+
+ for (v = so->variants; v; v = v->next)
+ if (!memcmp(&key, &v->key, sizeof(key)))
+ return v;
+
+ /* compile new variant if it doesn't exist already: */
+ v = create_variant(so, key);
+ v->next = so->variants;
+ so->variants = v;
+
+ return v;
+}
+
+
+static void
+delete_shader(struct fd3_shader_stateobj *so)
+{
+ struct fd3_shader_variant *v, *t;
+ for (v = so->variants; v; ) {
+ t = v;
+ v = v->next;
+ delete_variant(t);
+ }
+ free((void *)so->tokens);
+ free(so);
+}
+
+static struct fd3_shader_stateobj *
+create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+ enum shader_t type)
+{
+ struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+ so->pctx = pctx;
+ so->type = type;
+ so->tokens = tgsi_dup_tokens(cso->tokens);
+ return so;
+}
+
static void *
fd3_fp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
}
static void
-emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
+emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so)
{
const struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
}
static int
-find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
}
static uint32_t
-find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
void
fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, bool binning)
+ struct fd_program_stateobj *prog, struct fd3_shader_key key)
{
- const struct fd3_shader_stateobj *vp = prog->vp;
- const struct fd3_shader_stateobj *fp = prog->fp;
- const struct ir3_shader_info *vsi = &vp->info;
- const struct ir3_shader_info *fsi = &fp->info;
+ const struct fd3_shader_variant *vp, *fp;
+ const struct ir3_shader_info *vsi, *fsi;
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
int i;
- if (binning) {
+ vp = fd3_shader_variant(prog->vp, key);
+
+ if (key.binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
- static const struct fd3_shader_stateobj binning_fp = {};
+ static const struct fd3_shader_variant binning_fp = {};
fp = &binning_fp;
- fsi = &fp->info;
+ } else {
+ fp = fd3_shader_variant(prog->fp, key);
}
+ vsi = &vp->info;
+ fsi = &fp->info;
+
pos_regid = find_output_regid(vp,
fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
posz_regid = find_output_regid(fp,
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
- COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) |
+ COND(key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
- if (binning) {
+ if (key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
- COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
+ COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- if (binning) {
+ if (key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1));
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
- OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
- OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
- OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
- OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+ OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
- OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
- OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
- OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
- OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
}
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
- if (!binning) {
+ if (!key.binning_pass) {
emit_shader(ring, fp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
#include "pipe/p_context.h"
#include "freedreno_context.h"
-
+#include "fd3_util.h"
#include "ir3.h"
#include "disasm.h"
return sem & 0xff;
}
-struct fd3_shader_stateobj {
- enum shader_t type;
-
+struct fd3_shader_variant {
struct fd_bo *bo;
+ struct fd3_shader_key key;
+
struct ir3_shader_info info;
struct ir3_shader *ir;
- /* is shader using (or more precisely, is color_regid) half-
- * precision register?
- */
- bool half_precision;
-
/* the instructions length is in units of instruction groups
* (4 instructions, 8 dwords):
*/
uint32_t val[4];
} immediates[64];
+ /* shader varients form a linked list: */
+ struct fd3_shader_variant *next;
+
+ /* replicated here to avoid passing extra ptrs everywhere: */
+ enum shader_t type;
+ struct fd3_shader_stateobj *so;
+};
+
+struct fd3_shader_stateobj {
+ enum shader_t type;
+
+ struct pipe_context *pctx;
+ const struct tgsi_token *tokens;
+
+ struct fd3_shader_variant *variants;
+
/* so far, only used for blit_prog shader.. values for
* VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE
+ *
+ * Possibly should be in fd3_program_variant?
*/
uint32_t vinterp[4], vpsrepl[4];
};
+struct fd3_shader_variant * fd3_shader_variant(struct fd3_shader_stateobj *so,
+ struct fd3_shader_key key);
+
void fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, bool binning);
+ struct fd_program_stateobj *prog, struct fd3_shader_key key);
void fd3_prog_init(struct pipe_context *pctx);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+/* Configuration key used to identify a shader variant.. different
+ * shader variants can be used to implement features not supported
+ * in hw (two sided color), binning-pass vertex shader, etc.
+ *
+ * NOTE: this is declared here (rather than fd3_program.h) as it is
+ * passed around through a lot of the emit code in various parts
+ * which would otherwise not necessarily need to incl fd3_program.h
+ */
+struct fd3_shader_key {
+ /* vertex shader variant parameters: */
+ unsigned binning_pass : 1;
+
+ /* fragment shader variant parameters: */
+ unsigned color_two_side : 1;
+ unsigned half_precision : 1;
+};
+struct fd3_shader_variant;
+
#endif /* FD3_UTIL_H_ */