#include "instr-a3xx.h"
#include "ir3.h"
+/* NOTE on half/full precision:
+ * Currently, the front end (ie. basically this file) does everything in
+ * full precision (with the exception of trans_arl() which doesn't work
+ * currently.. we reject anything with relative addressing and fallback
+ * to old compiler).
+ *
+ * In the RA step, if half_precision, it will assign the output to hr0.x
+ * but use full precision everywhere else.
+ *
+ * Eventually we'll need a better way to communicate type information
+ * to RA so that it can more properly assign both half and full precision
+ * registers. (And presumably double precision pairs for a4xx?) This
+ * would let us make more use of half precision registers, while still
+ * keeping things like tex coords in full precision registers.
+ *
+ * Since the RA is dealing with patching instruction types for half
+ * precision output, we can ignore that in the front end and just always
+ * create full precision instructions.
+ */
struct fd3_compile_context {
const struct tgsi_token *tokens;
ir3_dump_instr_list(ctx.block->head);
}
- ret = ir3_block_ra(ctx.block, so->type);
+ ret = ir3_block_ra(ctx.block, so->type, key.half_precision);
if (ret)
goto out;
/* do binning pass first: */
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ // TODO set .half_precision based on render target format,
+ // ie. float16 and smaller use half, float32 use full..
+ .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
};
draw_impl(ctx, info, ctx->binning_ring,
dirty & ~(FD_DIRTY_BLEND), key);
struct fd_ringbuffer *ring = ctx->binning_ring;
struct fd3_shader_key key = {
.binning_pass = true,
+ .half_precision = true,
};
fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
unsigned dirty = ctx->dirty;
unsigned ce, i;
struct fd3_shader_key key = {
+ .half_precision = true,
};
dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
v->type = so->type;
if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("dump tgsi: type=%d", so->type);
+ DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", so->type,
+ key.binning_pass, key.color_two_side, key.half_precision);
tgsi_dump(tokens, 0);
}
fixup_vp_regfootprint(v);
if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d", v->type);
+ DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+ key.binning_pass, key.color_two_side, key.half_precision);
disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
}
struct ir3_ra_ctx {
struct ir3_block *block;
enum shader_t type;
+ bool half_precision;
int cnt;
bool error;
};
+/* sorta ugly way to retrofit half-precision support.. rather than
+ * passing extra param around, just OR in a high bit. All the low
+ * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
+ * will continue to work as long as you don't underflow (and that
+ * would go badly anyways).
+ */
+#define REG_HALF 0x8000
+
struct ir3_ra_assignment {
int8_t off; /* offset of instruction dst within range */
uint8_t num; /* number of components for the range */
* see how because the blob driver always uses r0.x (ie.
* all zeros)
*/
- if (ctx->type == SHADER_FRAGMENT)
+ if ((ctx->type == SHADER_FRAGMENT) && !ctx->half_precision)
return 2;
return 0;
}
return (struct ra_assign_visitor *)v;
}
+static type_t half_type(type_t type)
+{
+ switch (type) {
+ case TYPE_F32: return TYPE_F16;
+ case TYPE_U32: return TYPE_U16;
+ case TYPE_S32: return TYPE_S16;
+ /* instructions may already be fixed up: */
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return type;
+ default:
+ assert(0);
+ return ~0;
+ }
+}
+
+/* some instructions need fix-up if dst register is half precision: */
+static void fixup_half_instr_dst(struct ir3_instruction *instr)
+{
+ switch (instr->category) {
+ case 1: /* move instructions */
+ instr->cat1.dst_type = half_type(instr->cat1.dst_type);
+ break;
+ case 3:
+ switch (instr->opc) {
+ case OPC_MAD_F32:
+ instr->opc = OPC_MAD_F16;
+ break;
+ case OPC_SEL_B32:
+ instr->opc = OPC_SEL_B16;
+ break;
+ case OPC_SEL_S32:
+ instr->opc = OPC_SEL_S16;
+ break;
+ case OPC_SEL_F32:
+ instr->opc = OPC_SEL_F16;
+ break;
+ case OPC_SAD_S32:
+ instr->opc = OPC_SAD_S16;
+ break;
+ /* instructions may already be fixed up: */
+ case OPC_MAD_F16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case 5:
+ instr->cat5.type = half_type(instr->cat5.type);
+ break;
+ }
+}
+/* some instructions need fix-up if src register is half precision: */
+static void fixup_half_instr_src(struct ir3_instruction *instr)
+{
+ switch (instr->category) {
+ case 1: /* move instructions */
+ instr->cat1.src_type = half_type(instr->cat1.src_type);
+ break;
+ }
+}
+
static void ra_assign_reg(struct ir3_visitor *v,
struct ir3_instruction *instr, struct ir3_register *reg)
{
struct ra_assign_visitor *a = ra_assign_visitor(v);
reg->flags &= ~IR3_REG_SSA;
- reg->num = a->num;
+ reg->num = a->num & ~REG_HALF;
+ if (a->num & REG_HALF) {
+ reg->flags |= IR3_REG_HALF;
+ /* if dst reg being assigned, patch up the instr: */
+ if (reg == instr->regs[0])
+ fixup_half_instr_dst(instr);
+ else
+ fixup_half_instr_src(instr);
+ }
}
static void ra_assign_dst_shader_input(struct ir3_visitor *v,
/* if we've already visited this instruction, bail now: */
if (ir3_instr_check_mark(assigner)) {
- debug_assert(assigner->regs[0]->num == num);
- if (assigner->regs[0]->num != num) {
+ debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
+ if (assigner->regs[0]->num != (num & ~REG_HALF)) {
/* impossible situation, should have been resolved
* at an earlier stage by inserting extra mov's:
*/
base = alloc_block(ctx, NULL, block->noutputs + off);
+ if (ctx->half_precision)
+ base |= REG_HALF;
+
for (i = 0; i < block->noutputs; i++)
if (block->outputs[i])
ra_assign(ctx, block->outputs[i], base + i + off);
if (ctx->type == SHADER_FRAGMENT) {
for (i = 0; i < block->ninputs; i++)
if (block->inputs[i])
- ra_assign(ctx, block->inputs[i], base + i);
+ ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + i);
} else {
for (i = 0; i < block->ninputs; i++)
if (block->inputs[i])
return 0;
}
-int ir3_block_ra(struct ir3_block *block, enum shader_t type)
+int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+ bool half_precision)
{
struct ir3_ra_ctx ctx = {
.block = block,
.type = type,
+ .half_precision = half_precision,
};
ir3_shader_clear_mark(block->shader);
return block_ra(&ctx, block);