From 5a5a43078c502841b332c92281201e758dcbae2d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 2 Mar 2018 10:21:55 -0500 Subject: [PATCH] freedreno/ir3: start dealing with half-precision Some instructions, assume src and/or dst is half-precision based on a type field (ie. f32/s32/u32 are full precision but others are half precision). So add some code to sanity check the src/dst registers to catch mixups. Also propagate half-precision flag for SSA sources. The instruction consuming a SSA value needs to be of the same type as the one producing it. This is probably not complete half-precision support, but a useful first step. We do still need to add support for nir alu instructions for converting between half/full precision. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 55 +++++++++++++++---- src/gallium/drivers/freedreno/ir3/ir3.h | 48 +++++++++------- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 8 +++ 3 files changed, 81 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index dd5fb2fbbe5..d1a73ddc727 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -68,10 +68,17 @@ void ir3_destroy(struct ir3 *shader) #define iassert(cond) do { \ if (!(cond)) { \ - assert(cond); \ + debug_assert(cond); \ return -1; \ } } while (0) +#define iassert_type(reg, full) do { \ + if ((full)) { \ + iassert(!((reg)->flags & IR3_REG_HALF)); \ + } else { \ + iassert((reg)->flags & IR3_REG_HALF); \ + } } while (0); + static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, uint32_t repeat, uint32_t valid_flags) { @@ -142,11 +149,6 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr, return 0; } -static uint32_t type_flags(type_t type) -{ - return (type_size(type) == 32) ? 0 : IR3_REG_HALF; -} - static int emit_cat1(struct ir3_instruction *instr, void *ptr, struct ir3_info *info) { @@ -155,9 +157,9 @@ static int emit_cat1(struct ir3_instruction *instr, void *ptr, instr_cat1_t *cat1 = ptr; iassert(instr->regs_count == 2); - iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); - iassert((src->flags & IR3_REG_IMMED) || - !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); + iassert_type(dst, type_size(instr->cat1.dst_type) == 32); + if (!(src->flags & IR3_REG_IMMED)) + iassert_type(src, type_size(instr->cat1.src_type) == 32); if (src->flags & IR3_REG_IMMED) { cat1->iim_val = src->iim_val; @@ -425,7 +427,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr, struct ir3_register *src3 = instr->regs[3]; instr_cat5_t *cat5 = ptr; - iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); + iassert_type(dst, type_size(instr->cat5.type) == 32) assume(src1 || !src2); assume(src2 || !src3); @@ -477,6 +479,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, { struct ir3_register *dst, *src1, *src2; instr_cat6_t *cat6 = ptr; + bool type_full = type_size(instr->cat6.type) == 32; cat6->type = instr->cat6.type; cat6->opc = instr->opc; @@ -485,6 +488,36 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, cat6->g = !!(instr->flags & IR3_INSTR_G); cat6->opc_cat = 6; + switch (instr->opc) { + case OPC_RESINFO: + case OPC_RESFMT: + iassert_type(instr->regs[0], type_full); /* dst */ + iassert_type(instr->regs[1], type_full); /* src1 */ + break; + case OPC_L2G: + case OPC_G2L: + iassert_type(instr->regs[0], true); /* dst */ + iassert_type(instr->regs[1], true); /* src1 */ + break; + case OPC_STG: + case OPC_STL: + case OPC_STP: + case OPC_STI: + case OPC_STLW: + case OPC_STIB: + /* no dst, so regs[0] is dummy */ + iassert_type(instr->regs[1], true); /* dst */ + iassert_type(instr->regs[2], type_full); /* src1 */ + iassert_type(instr->regs[3], true); /* src2 */ + break; + default: + iassert_type(instr->regs[0], type_full); /* dst */ + iassert_type(instr->regs[1], true); /* src1 */ + if (instr->regs_count > 2) + iassert_type(instr->regs[2], true); /* src1 */ + break; + } + /* the "dst" for a store instruction is (from the perspective * of data flow in the shader, ie. register use/def, etc) in * fact a register that is read by the instruction, rather @@ -628,7 +661,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, cat6->src_off = false; - cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); + cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF); cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); if (src2) { cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index dd13e323800..eaac2b7dfac 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -1015,18 +1015,28 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary) /* ************************************************************************* */ /* instruction helpers */ +/* creates SSA src of correct type (ie. half vs full precision) */ +static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr, + struct ir3_instruction *src, unsigned flags) +{ + struct ir3_register *reg; + if (src->regs[0]->flags & IR3_REG_HALF) + flags |= IR3_REG_HALF; + reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags); + reg->instr = src; + return reg; +} + static inline struct ir3_instruction * ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) { struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ if (src->regs[0]->flags & IR3_REG_ARRAY) { - struct ir3_register *src_reg = - ir3_reg_create(instr, 0, IR3_REG_ARRAY); + struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); src_reg->array = src->regs[0]->array; - src_reg->instr = src; } else { - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; + __ssa_src(instr, src, 0); } debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); instr->cat1.src_type = type; @@ -1040,7 +1050,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, { struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; + __ssa_src(instr, src, 0); instr->cat1.src_type = src_type; instr->cat1.dst_type = dst_type; debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); @@ -1070,7 +1080,7 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *instr = \ ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ - ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ + __ssa_src(instr, a, aflags); \ return instr; \ } @@ -1083,8 +1093,8 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *instr = \ ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ - ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ return instr; \ } @@ -1098,9 +1108,9 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *instr = \ ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ - ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ return instr; \ } @@ -1115,10 +1125,10 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *instr = \ ir3_instr_create2(block, OPC_##name, 5); \ ir3_reg_create(instr, 0, 0); /* dst */ \ - ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ return instr; \ } @@ -1133,10 +1143,10 @@ ir3_##name##_##f(struct ir3_block *block, \ struct ir3_instruction *instr = \ ir3_instr_create2(block, OPC_##name, 5); \ ir3_reg_create(instr, 0, 0); /* dst */ \ - ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ - ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ instr->flags |= IR3_INSTR_##f; \ return instr; \ } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 8644bc19218..1d6403b5260 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -472,6 +472,14 @@ get_src(struct ir3_context *ctx, nir_src *src) static void put_dst(struct ir3_context *ctx, nir_dest *dst) { + unsigned bit_size = nir_dest_bit_size(*dst); + + if (bit_size < 32) { + for (unsigned i = 0; i < ctx->last_dst_n; i++) { + ctx->last_dst[i]->regs[0]->flags |= IR3_REG_HALF; + } + } + if (!dst->is_ssa) { nir_register *reg = dst->reg.reg; struct ir3_array *arr = get_array(ctx, reg); -- 2.30.2