From 130a3fba1c744dce5c052840b4f78437d4c73bf8 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 30 Apr 2020 18:15:09 -0400 Subject: [PATCH] pan/bi: Pack round opcodes (FMA, either 16 or 32) Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_pack.c | 15 ++++++++++++++- src/panfrost/bifrost/bifrost.h | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c index 5cd902da250..6f1b6dc7c53 100644 --- a/src/panfrost/bifrost/bi_pack.c +++ b/src/panfrost/bifrost/bi_pack.c @@ -1127,6 +1127,19 @@ bi_pack_fma_bitwise(bi_instruction *ins, struct bi_registers *regs) RETURN_PACKED(pack); } + +static unsigned +bi_pack_fma_round(bi_instruction *ins, struct bi_registers *regs) +{ + bool fp16 = ins->dest_type == nir_type_float16; + assert(fp16 || ins->dest_type == nir_type_float32); + + unsigned op = fp16 + ? BIFROST_FMA_ROUND_16(ins->roundmode, bi_swiz16(ins, 0)) + : BIFROST_FMA_ROUND_32(ins->roundmode); + + return bi_pack_fma_1src(ins, regs, op); +} static unsigned bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) @@ -1160,7 +1173,7 @@ bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) case BI_SELECT: return bi_pack_fma_select(bundle.fma, regs); case BI_ROUND: - return BIFROST_FMA_NOP; + return bi_pack_fma_round(bundle.fma, regs); case BI_REDUCE_FMA: return bi_pack_fma_reduce(bundle.fma, regs); default: diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h index bfbaa2c2e56..ad2b2ac01b7 100644 --- a/src/panfrost/bifrost/bifrost.h +++ b/src/panfrost/bifrost/bifrost.h @@ -101,6 +101,9 @@ enum bifrost_packed_src { #define BIFROST_FMA_OP_ADD_FREXPM ((BIFROST_FMA_EXT | 0x1e80) >> 3) #define BIFROST_FMA_SEL_16(swiz) (((BIFROST_FMA_EXT | 0x1e00) >> 3) | (swiz)) +#define BIFROST_FMA_ROUND_16(mode, swiz) (BIFROST_FMA_EXT | 0x1800 | (swiz) | ((mode) << 6)) +#define BIFROST_FMA_ROUND_32(mode) (BIFROST_FMA_EXT | 0x1805 | ((mode) << 6)) + struct bifrost_fma_inst { unsigned src0 : 3; unsigned op : 20; -- 2.30.2