X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fpanfrost%2Fbifrost%2Fbi_pack.c;h=852dd07a1367052e85f0b11d156bf9afcb1b67b6;hb=79f30d8a86e9f9fe0f542c75f8ebf2e617f13135;hp=ea0319a0cd7d35287837432219795d5e72023a7e;hpb=c94d41ad7c92a9549e16f733dcb6a0a0762e811f;p=mesa.git diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c index ea0319a0cd7..852dd07a136 100644 --- a/src/panfrost/bifrost/bi_pack.c +++ b/src/panfrost/bifrost/bi_pack.c @@ -22,6 +22,7 @@ */ #include "compiler.h" +#include "bi_print.h" #define RETURN_PACKED(str) { \ uint64_t temp = 0; \ @@ -47,6 +48,8 @@ bi_pack_header(bi_clause *clause, bi_clause *next, bool is_fragment) .scoreboard_index = clause->scoreboard_id, .clause_type = clause->clause_type, .next_clause_type = next ? next->clause_type : 0, + .suppress_inf = true, + .suppress_nan = true, }; header.branch_cond |= header.back_to_back; @@ -56,50 +59,6 @@ bi_pack_header(bi_clause *clause, bi_clause *next, bool is_fragment) return u; } -/* Represents the assignment of ports for a given bundle */ - -struct bi_registers { - /* Register to assign to each port */ - unsigned port[4]; - - /* Read ports can be disabled */ - bool enabled[2]; - - /* Should we write FMA? what about ADD? If only a single port is - * enabled it is in port 2, else ADD/FMA is 2/3 respectively */ - bool write_fma, write_add; - - /* Should we read with port 3? */ - bool read_port3; - - /* Packed uniform/constant */ - uint8_t uniform_constant; - - /* Whether writes are actually for the last instruction */ - bool first_instruction; -}; - -static inline void -bi_print_ports(struct bi_registers *regs) -{ - for (unsigned i = 0; i < 2; ++i) { - if (regs->enabled[i]) - printf("port %u: %u\n", i, regs->port[i]); - } - - if (regs->write_fma || regs->write_add) { - printf("port 2 (%s): %u\n", - regs->write_add ? "ADD" : "FMA", - regs->port[2]); - } - - if ((regs->write_fma && regs->write_add) || regs->read_port3) { - printf("port 3 (%s): %u\n", - regs->read_port3 ? "read" : "FMA", - regs->port[3]); - } -} - /* The uniform/constant slot allows loading a contiguous 64-bit immediate or * pushed uniform per bundle. Figure out which one we need in the bundle (the * scheduler needs to ensure we only have one type per bundle), validate @@ -168,6 +127,10 @@ bi_assign_uniform_constant_single( if (s == 0 && (ins->type == BI_LOAD_VAR_ADDRESS || ins->type == BI_LOAD_ATTR)) continue; if (ins->src[s] & BIR_INDEX_CONSTANT) { + /* Let direct addresses through */ + if (ins->type == BI_LOAD_VAR) + continue; + bool hi = false; bool b64 = nir_alu_type_get_type_size(ins->src_types[s]) > 32; uint64_t cons = bi_get_immediate(ins, s); @@ -195,6 +158,8 @@ bi_assign_uniform_constant_single( regs->uniform_constant = f; ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_CONST_LO; assigned = true; + } else if (ins->src[s] & BIR_INDEX_ZERO && fast_zero) { + ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_STAGE; } else if (s & BIR_INDEX_UNIFORM) { unreachable("Push uniforms not implemented yet"); } @@ -458,18 +423,18 @@ bi_get_src_reg_port(struct bi_registers *regs, unsigned src) } static enum bifrost_packed_src -bi_get_src(bi_instruction *ins, struct bi_registers *regs, unsigned s, bool is_fma) +bi_get_src(bi_instruction *ins, struct bi_registers *regs, unsigned s) { unsigned src = ins->src[s]; if (src & BIR_INDEX_REGISTER) return bi_get_src_reg_port(regs, src); - else if (src & BIR_INDEX_ZERO && is_fma) - return BIFROST_SRC_STAGE; else if (src & BIR_INDEX_PASS) return src & ~BIR_INDEX_PASS; - else - unreachable("Unknown src"); + else { + bi_print_instruction(ins, stderr); + unreachable("Unknown src in above instruction"); + } } /* Constructs a packed 2-bit swizzle for a 16-bit vec2 source. Source must be @@ -508,10 +473,10 @@ bi_pack_fma_fma(bi_instruction *ins, struct bi_registers *regs) bool flip_ab = ins->src_abs[1]; struct bifrost_fma_mscale pack = { - .src0 = bi_get_src(ins, regs, flip_ab ? 1 : 0, true), - .src1 = bi_get_src(ins, regs, flip_ab ? 0 : 1, true), - .src2 = bi_get_src(ins, regs, 2, true), - .src3 = bi_get_src(ins, regs, 3, true), + .src0 = bi_get_src(ins, regs, flip_ab ? 1 : 0), + .src1 = bi_get_src(ins, regs, flip_ab ? 0 : 1), + .src2 = bi_get_src(ins, regs, 2), + .src3 = bi_get_src(ins, regs, 3), .mscale_mode = 0, .mode = ins->outmod, .src0_abs = ins->src_abs[0] || ins->src_abs[1], @@ -523,9 +488,9 @@ bi_pack_fma_fma(bi_instruction *ins, struct bi_registers *regs) RETURN_PACKED(pack); } else if (ins->dest_type == nir_type_float32) { struct bifrost_fma_fma pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), - .src2 = bi_get_src(ins, regs, 2, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), + .src2 = bi_get_src(ins, regs, 2), .src0_abs = ins->src_abs[0], .src1_abs = ins->src_abs[1], .src2_abs = ins->src_abs[2], @@ -539,9 +504,9 @@ bi_pack_fma_fma(bi_instruction *ins, struct bi_registers *regs) RETURN_PACKED(pack); } else if (ins->dest_type == nir_type_float16) { struct bifrost_fma_fma16 pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), - .src2 = bi_get_src(ins, regs, 2, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), + .src2 = bi_get_src(ins, regs, 2), .swizzle_0 = bi_swiz16(ins, 0), .swizzle_1 = bi_swiz16(ins, 1), .swizzle_2 = bi_swiz16(ins, 2), @@ -567,8 +532,8 @@ bi_pack_fma_addmin_f32(bi_instruction *ins, struct bi_registers *regs) BIFROST_FMA_OP_FMAX32; struct bifrost_fma_add pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .src0_abs = ins->src_abs[0], .src1_abs = ins->src_abs[1], .src0_neg = ins->src_neg[0], @@ -582,16 +547,9 @@ bi_pack_fma_addmin_f32(bi_instruction *ins, struct bi_registers *regs) RETURN_PACKED(pack); } -static unsigned -bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA) +static bool +bi_pack_fp16_abs(bi_instruction *ins, struct bi_registers *regs, bool *flip) { - unsigned op = - (!FMA) ? ((ins->op.minmax == BI_MINMAX_MIN) ? - BIFROST_ADD_OP_FMIN16 : BIFROST_ADD_OP_FMAX16) : - (ins->type == BI_ADD) ? BIFROST_FMA_OP_FADD16 : - (ins->op.minmax == BI_MINMAX_MIN) ? BIFROST_FMA_OP_FMIN16 : - BIFROST_FMA_OP_FMAX16; - /* Absolute values are packed in a quirky way. Let k = src1 < src0. Let * l be an auxiliary bit we encode. Then the hardware determines: * @@ -621,25 +579,40 @@ bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA) */ unsigned abs_0 = ins->src_abs[0], abs_1 = ins->src_abs[1]; - unsigned src_0 = bi_get_src(ins, regs, 0, true); - unsigned src_1 = bi_get_src(ins, regs, 1, true); - bool l = false; - bool flip = false; + unsigned src_0 = bi_get_src(ins, regs, 0); + unsigned src_1 = bi_get_src(ins, regs, 1); - assert(!(abs_0 && abs_1)); + assert(!(abs_0 && abs_1 && src_0 == src_1)); if (!abs_0 && !abs_1) { /* Force k = 0 <===> NOT(src1 < src0) */ - flip = (src_1 < src_0); + *flip = (src_1 < src_0); + return false; } else if (abs_0 && !abs_1) { - l = src_1 >= src_0; + return src_1 >= src_0; } else if (abs_1 && !abs_0) { - flip = true; - l = src_0 >= src_1; + *flip = true; + return src_0 >= src_1; } else { - flip = (src_0 >= src_1); - l = true; + *flip = !(src_1 < src_0); + return true; } +} + +static unsigned +bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA) +{ + unsigned op = + (!FMA) ? ((ins->op.minmax == BI_MINMAX_MIN) ? + BIFROST_ADD_OP_FMIN16 : BIFROST_ADD_OP_FMAX16) : + (ins->type == BI_ADD) ? BIFROST_FMA_OP_FADD16 : + (ins->op.minmax == BI_MINMAX_MIN) ? BIFROST_FMA_OP_FMIN16 : + BIFROST_FMA_OP_FMAX16; + + bool flip = false; + bool l = bi_pack_fp16_abs(ins, regs, &flip); + unsigned src_0 = bi_get_src(ins, regs, 0); + unsigned src_1 = bi_get_src(ins, regs, 1); if (FMA) { struct bifrost_fma_add_minmax16 pack = { @@ -647,6 +620,8 @@ bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA) .src1 = flip ? src_0 : src_1, .src0_neg = ins->src_neg[flip ? 1 : 0], .src1_neg = ins->src_neg[flip ? 0 : 1], + .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0), + .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1), .abs1 = l, .outmod = ins->outmod, .mode = (ins->type == BI_ADD) ? ins->roundmode : ins->minmax, @@ -664,8 +639,8 @@ bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA) .src0_neg = ins->src_neg[flip ? 1 : 0], .src1_neg = ins->src_neg[flip ? 0 : 1], .abs1 = l, - .src0_swizzle = bi_swiz16(ins, 0), - .src1_swizzle = bi_swiz16(ins, 1), + .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0), + .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1), .mode = ins->minmax, .op = op }; @@ -689,7 +664,7 @@ static unsigned bi_pack_fma_1src(bi_instruction *ins, struct bi_registers *regs, unsigned op) { struct bifrost_fma_inst pack = { - .src0 = bi_get_src(ins, regs, 0, true), + .src0 = bi_get_src(ins, regs, 0), .op = op }; @@ -700,8 +675,8 @@ static unsigned bi_pack_fma_2src(bi_instruction *ins, struct bi_registers *regs, unsigned op) { struct bifrost_fma_2src pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .op = op }; @@ -712,7 +687,7 @@ static unsigned bi_pack_add_1src(bi_instruction *ins, struct bi_registers *regs, unsigned op) { struct bifrost_add_inst pack = { - .src0 = bi_get_src(ins, regs, 0, true), + .src0 = bi_get_src(ins, regs, 0), .op = op }; @@ -782,10 +757,10 @@ bi_pack_fma_csel(bi_instruction *ins, struct bi_registers *regs) unsigned res_1 = (invert ? 2 : 3); struct bifrost_csel4 pack = { - .src0 = bi_get_src(ins, regs, cmp_0, true), - .src1 = bi_get_src(ins, regs, cmp_1, true), - .src2 = bi_get_src(ins, regs, res_0, true), - .src3 = bi_get_src(ins, regs, res_1, true), + .src0 = bi_get_src(ins, regs, cmp_0), + .src1 = bi_get_src(ins, regs, cmp_1), + .src2 = bi_get_src(ins, regs, res_0), + .src3 = bi_get_src(ins, regs, res_1), .cond = cond, .op = (size == 16) ? BIFROST_FMA_OP_CSEL4_V16 : BIFROST_FMA_OP_CSEL4 @@ -842,13 +817,13 @@ bi_pack_convert(bi_instruction *ins, struct bi_registers *regs, bool FMA) if (from_size == 32 && to_size == 16 && from_base == nir_type_float && to_base == from_base) { /* TODO: second vectorized source? */ struct bifrost_fma_2src pfma = { - .src0 = bi_get_src(ins, regs, 0, true), + .src0 = bi_get_src(ins, regs, 0), .src1 = BIFROST_SRC_STAGE, /* 0 */ .op = BIFROST_FMA_FLOAT32_TO_16 }; struct bifrost_add_2src padd = { - .src0 = bi_get_src(ins, regs, 0, true), + .src0 = bi_get_src(ins, regs, 0), .src1 = BIFROST_SRC_STAGE, /* 0 */ .op = BIFROST_ADD_FLOAT32_TO_16 }; @@ -942,10 +917,10 @@ bi_pack_fma_select(bi_instruction *ins, struct bi_registers *regs) } struct bifrost_fma_sel8 pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), - .src2 = bi_get_src(ins, regs, 2, true), - .src3 = bi_get_src(ins, regs, 3, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), + .src2 = bi_get_src(ins, regs, 2), + .src3 = bi_get_src(ins, regs, 3), .swizzle = swiz, .op = BIFROST_FMA_OP_SEL8 }; @@ -1019,8 +994,8 @@ bi_pack_fma_cmp(bi_instruction *ins, struct bi_registers *regs) cond = bi_flip_fcmp(cond); struct bifrost_fma_fcmp pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .src0_abs = ins->src_abs[0], .src1_abs = ins->src_abs[1], .src1_neg = neg, @@ -1030,12 +1005,121 @@ bi_pack_fma_cmp(bi_instruction *ins, struct bi_registers *regs) .op = BIFROST_FMA_OP_FCMP_GL }; + RETURN_PACKED(pack); + } else if (Tl == nir_type_float16 && Tr == nir_type_float16) { + bool flip = false; + bool l = bi_pack_fp16_abs(ins, regs, &flip); + enum bifrost_fcmp_cond cond = bi_fcmp_cond(ins->cond); + + if (flip) + cond = bi_flip_fcmp(cond); + + struct bifrost_fma_fcmp16 pack = { + .src0 = bi_get_src(ins, regs, flip ? 1 : 0), + .src1 = bi_get_src(ins, regs, flip ? 0 : 1), + .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0), + .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1), + .abs1 = l, + .unk = 0, + .cond = cond, + .op = BIFROST_FMA_OP_FCMP_GL_16, + }; + RETURN_PACKED(pack); } else { unreachable("Unknown cmp type"); } } + +static unsigned +bi_fma_bitwise_op(enum bi_bitwise_op op, bool rshift) +{ + switch (op) { + case BI_BITWISE_OR: + /* Via De Morgan's */ + return rshift ? + BIFROST_FMA_OP_RSHIFT_NAND : + BIFROST_FMA_OP_LSHIFT_NAND; + case BI_BITWISE_AND: + return rshift ? + BIFROST_FMA_OP_RSHIFT_AND : + BIFROST_FMA_OP_LSHIFT_AND; + case BI_BITWISE_XOR: + /* Shift direction handled out of band */ + return BIFROST_FMA_OP_RSHIFT_XOR; + default: + unreachable("Unknown op"); + } +} +static unsigned +bi_pack_fma_bitwise(bi_instruction *ins, struct bi_registers *regs) +{ + unsigned size = nir_alu_type_get_type_size(ins->dest_type); + assert(size <= 32); + + bool invert_0 = ins->bitwise.src_invert[0]; + bool invert_1 = ins->bitwise.src_invert[1]; + + if (ins->op.bitwise == BI_BITWISE_OR) { + /* Becomes NAND, so via De Morgan's: + * f(A) | f(B) = ~(~f(A) & ~f(B)) + * = NAND(~f(A), ~f(B)) + */ + + invert_0 = !invert_0; + invert_1 = !invert_1; + } else if (ins->op.bitwise == BI_BITWISE_XOR) { + /* ~A ^ ~B = ~(A ^ ~B) = ~(~(A ^ B)) = A ^ B + * ~A ^ B = ~(A ^ B) = A ^ ~B + */ + + invert_0 ^= invert_1; + invert_1 = false; + + /* invert_1 ends up specifying shift direction */ + invert_1 = !ins->bitwise.rshift; + } + + struct bifrost_shift_fma pack = { + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), + .src2 = bi_get_src(ins, regs, 2), + .half = (size == 32) ? 0 : (size == 16) ? 0x7 : (size == 8) ? 0x4 : 0, + .unk = 1, /* XXX */ + .invert_1 = invert_0, + .invert_2 = invert_1, + .op = bi_fma_bitwise_op(ins->op.bitwise, ins->bitwise.rshift) + }; + + RETURN_PACKED(pack); +} + +static unsigned +bi_pack_fma_round(bi_instruction *ins, struct bi_registers *regs) +{ + bool fp16 = ins->dest_type == nir_type_float16; + assert(fp16 || ins->dest_type == nir_type_float32); + + unsigned op = fp16 + ? BIFROST_FMA_ROUND_16(ins->roundmode, bi_swiz16(ins, 0)) + : BIFROST_FMA_ROUND_32(ins->roundmode); + + return bi_pack_fma_1src(ins, regs, op); +} + +static unsigned +bi_pack_fma_imath(bi_instruction *ins, struct bi_registers *regs) +{ + /* Scheduler: only ADD can have 8/16-bit imath */ + assert(ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32); + + unsigned op = ins->op.imath == BI_IMATH_ADD + ? BIFROST_FMA_IADD_32 + : BIFROST_FMA_ISUB_32; + + return bi_pack_fma_2src(ins, regs, op); +} static unsigned bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) @@ -1049,7 +1133,7 @@ bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) case BI_CMP: return bi_pack_fma_cmp(bundle.fma, regs); case BI_BITWISE: - return BIFROST_FMA_NOP; + return bi_pack_fma_bitwise(bundle.fma, regs); case BI_CONVERT: return bi_pack_convert(bundle.fma, regs, true); case BI_CSEL: @@ -1058,18 +1142,18 @@ bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) return bi_pack_fma_fma(bundle.fma, regs); case BI_FREXP: return bi_pack_fma_frexp(bundle.fma, regs); - case BI_ISUB: - return BIFROST_FMA_NOP; + case BI_IMATH: + return bi_pack_fma_imath(bundle.fma, regs); case BI_MINMAX: return bi_pack_fma_addmin(bundle.fma, regs); case BI_MOV: return bi_pack_fma_1src(bundle.fma, regs, BIFROST_FMA_OP_MOV); case BI_SHIFT: - return BIFROST_FMA_NOP; + unreachable("Packing todo"); case BI_SELECT: return bi_pack_fma_select(bundle.fma, regs); case BI_ROUND: - return BIFROST_FMA_NOP; + return bi_pack_fma_round(bundle.fma, regs); case BI_REDUCE_FMA: return bi_pack_fma_reduce(bundle.fma, regs); default: @@ -1092,10 +1176,9 @@ bi_pack_add_ld_vary(bi_clause *clause, bi_instruction *ins, struct bi_registers if (ins->src[0] & BIR_INDEX_CONSTANT) { /* Direct uses address field directly */ packed_addr = bi_get_immediate(ins, 0); - assert(packed_addr < 0b1000); } else { /* Indirect gets an extra source */ - packed_addr = bi_get_src(ins, regs, 0, false) | 0b11000; + packed_addr = bi_get_src(ins, regs, 0) | 0b11000; } /* The destination is thrown in the data register */ @@ -1106,7 +1189,7 @@ bi_pack_add_ld_vary(bi_clause *clause, bi_instruction *ins, struct bi_registers assert(channels >= 1 && channels <= 4); struct bifrost_ld_var pack = { - .src0 = bi_get_src(ins, regs, 1, false), + .src0 = bi_get_src(ins, regs, 1), .addr = packed_addr, .channels = MALI_POSITIVE(channels), .interp_mode = ins->load_vary.interp_mode, @@ -1122,8 +1205,8 @@ static unsigned bi_pack_add_2src(bi_instruction *ins, struct bi_registers *regs, unsigned op) { struct bifrost_add_2src pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .op = op }; @@ -1139,8 +1222,8 @@ bi_pack_add_addmin_f32(bi_instruction *ins, struct bi_registers *regs) BIFROST_ADD_OP_FMAX32; struct bifrost_add_faddmin pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .src0_abs = ins->src_abs[0], .src1_abs = ins->src_abs[1], .src0_neg = ins->src_neg[0], @@ -1160,8 +1243,8 @@ bi_pack_add_add_f16(bi_instruction *ins, struct bi_registers *regs) assert(ins->outmod == BIFROST_NONE); struct bifrost_add_faddmin pack = { - .src0 = bi_get_src(ins, regs, 0, true), - .src1 = bi_get_src(ins, regs, 1, true), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .src0_abs = ins->src_abs[0], .src1_abs = ins->src_abs[1], .src0_neg = ins->src_neg[0], @@ -1221,8 +1304,8 @@ static unsigned bi_pack_add_ld_var_addr(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs) { struct bifrost_ld_var_addr pack = { - .src0 = bi_get_src(ins, regs, 1, false), - .src1 = bi_get_src(ins, regs, 2, false), + .src0 = bi_get_src(ins, regs, 1), + .src1 = bi_get_src(ins, regs, 2), .location = bi_get_immediate(ins, 0), .type = bi_pack_ldst_type(ins->src_types[3]), .op = BIFROST_ADD_OP_LD_VAR_ADDR @@ -1238,8 +1321,8 @@ bi_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, struct bi_registers assert(ins->vector_channels >= 0 && ins->vector_channels <= 4); struct bifrost_ld_attr pack = { - .src0 = bi_get_src(ins, regs, 1, false), - .src1 = bi_get_src(ins, regs, 2, false), + .src0 = bi_get_src(ins, regs, 1), + .src1 = bi_get_src(ins, regs, 2), .location = bi_get_immediate(ins, 0), .channels = MALI_POSITIVE(ins->vector_channels), .type = bi_pack_ldst_type(ins->dest_type), @@ -1256,9 +1339,9 @@ bi_pack_add_st_vary(bi_clause *clause, bi_instruction *ins, struct bi_registers assert(ins->vector_channels >= 1 && ins->vector_channels <= 4); struct bifrost_st_vary pack = { - .src0 = bi_get_src(ins, regs, 1, false), - .src1 = bi_get_src(ins, regs, 2, false), - .src2 = bi_get_src(ins, regs, 3, false), + .src0 = bi_get_src(ins, regs, 1), + .src1 = bi_get_src(ins, regs, 2), + .src2 = bi_get_src(ins, regs, 3), .channels = MALI_POSITIVE(ins->vector_channels), .op = BIFROST_ADD_OP_ST_VAR }; @@ -1273,8 +1356,8 @@ bi_pack_add_atest(bi_clause *clause, bi_instruction *ins, struct bi_registers *r bool fp16 = (ins->src_types[1] == nir_type_float16); struct bifrost_add_atest pack = { - .src0 = bi_get_src(ins, regs, 0, false), - .src1 = bi_get_src(ins, regs, 1, false), + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), .half = fp16, .component = fp16 ? ins->swizzle[1][0] : 1, /* Set for fp32 */ .op = BIFROST_ADD_OP_ATEST, @@ -1291,7 +1374,7 @@ static unsigned bi_pack_add_blend(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs) { struct bifrost_add_inst pack = { - .src0 = bi_get_src(ins, regs, 1, false), + .src0 = bi_get_src(ins, regs, 1), .op = BIFROST_ADD_OP_BLEND }; @@ -1340,18 +1423,19 @@ bi_pack_add_table(bi_instruction *ins, struct bi_registers *regs) return bi_pack_add_1src(ins, regs, op); } static unsigned -bi_pack_add_tex_compact(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs) +bi_pack_add_tex_compact(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs, gl_shader_stage stage) { bool f16 = ins->dest_type == nir_type_float16; + bool vtx = stage != MESA_SHADER_FRAGMENT; struct bifrost_tex_compact pack = { - .src0 = bi_get_src(ins, regs, 0, false), - .src1 = bi_get_src(ins, regs, 1, false), - .op = f16 ? BIFROST_ADD_OP_TEX_COMPACT_F16 : - BIFROST_ADD_OP_TEX_COMPACT_F32, - .unknown = 1, - .tex_index = 0, - .sampler_index = 0 + .src0 = bi_get_src(ins, regs, 0), + .src1 = bi_get_src(ins, regs, 1), + .op = f16 ? BIFROST_ADD_OP_TEX_COMPACT_F16(vtx) : + BIFROST_ADD_OP_TEX_COMPACT_F32(vtx), + .compute_lod = !vtx, + .tex_index = ins->texture.texture_index, + .sampler_index = ins->texture.sampler_index }; bi_write_data_register(clause, ins); @@ -1369,8 +1453,165 @@ bi_pack_add_select(bi_instruction *ins, struct bi_registers *regs) return bi_pack_add_2src(ins, regs, op); } +static enum bifrost_discard_cond +bi_cond_to_discard(enum bi_cond cond, bool *flip) +{ + switch (cond){ + case BI_COND_GT: + *flip = true; + /* fallthrough */ + case BI_COND_LT: + return BIFROST_DISCARD_FLT; + case BI_COND_GE: + *flip = true; + /* fallthrough */ + case BI_COND_LE: + return BIFROST_DISCARD_FLE; + case BI_COND_NE: + return BIFROST_DISCARD_FNE; + case BI_COND_EQ: + return BIFROST_DISCARD_FEQ; + default: + unreachable("Invalid op for discard"); + } +} + +static unsigned +bi_pack_add_discard(bi_instruction *ins, struct bi_registers *regs) +{ + bool fp16 = ins->src_types[0] == nir_type_float16; + assert(fp16 || ins->src_types[0] == nir_type_float32); + + bool flip = false; + enum bifrost_discard_cond cond = bi_cond_to_discard(ins->cond, &flip); + + struct bifrost_add_discard pack = { + .src0 = bi_get_src(ins, regs, flip ? 1 : 0), + .src1 = bi_get_src(ins, regs, flip ? 0 : 1), + .cond = cond, + .src0_select = fp16 ? ins->swizzle[0][0] : 0, + .src1_select = fp16 ? ins->swizzle[1][0] : 0, + .fp32 = fp16 ? 0 : 1, + .op = BIFROST_ADD_OP_DISCARD + }; + + RETURN_PACKED(pack); +} + +static enum bifrost_icmp_cond +bi_cond_to_icmp(enum bi_cond cond, bool *flip, bool is_unsigned, bool is_16) +{ + switch (cond){ + case BI_COND_LT: + *flip = true; + /* fallthrough */ + case BI_COND_GT: + return is_unsigned ? (is_16 ? BIFROST_ICMP_IGE : BIFROST_ICMP_UGT) + : BIFROST_ICMP_IGT; + case BI_COND_LE: + *flip = true; + /* fallthrough */ + case BI_COND_GE: + return is_unsigned ? BIFROST_ICMP_UGE : + (is_16 ? BIFROST_ICMP_UGT : BIFROST_ICMP_IGE); + case BI_COND_NE: + return BIFROST_ICMP_NEQ; + case BI_COND_EQ: + return BIFROST_ICMP_EQ; + default: + unreachable("Invalid op for icmp"); + } +} + +static unsigned +bi_pack_add_icmp32(bi_instruction *ins, struct bi_registers *regs, bool flip, + enum bifrost_icmp_cond cond) +{ + struct bifrost_add_icmp pack = { + .src0 = bi_get_src(ins, regs, flip ? 1 : 0), + .src1 = bi_get_src(ins, regs, flip ? 0 : 1), + .cond = cond, + .sz = 1, + .d3d = false, + .op = BIFROST_ADD_OP_ICMP_32 + }; + + RETURN_PACKED(pack); +} + +static unsigned +bi_pack_add_icmp16(bi_instruction *ins, struct bi_registers *regs, bool flip, + enum bifrost_icmp_cond cond) +{ + struct bifrost_add_icmp16 pack = { + .src0 = bi_get_src(ins, regs, flip ? 1 : 0), + .src1 = bi_get_src(ins, regs, flip ? 0 : 1), + .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0), + .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1), + .cond = cond, + .d3d = false, + .op = BIFROST_ADD_OP_ICMP_16 + }; + + RETURN_PACKED(pack); +} + +static unsigned +bi_pack_add_cmp(bi_instruction *ins, struct bi_registers *regs) +{ + nir_alu_type Tl = ins->src_types[0]; + nir_alu_type Tr = ins->src_types[1]; + nir_alu_type Bl = nir_alu_type_get_base_type(Tl); + + if (Bl == nir_type_uint || Bl == nir_type_int) { + assert(Tl == Tr); + unsigned sz = nir_alu_type_get_type_size(Tl); + + bool flip = false; + + enum bifrost_icmp_cond cond = bi_cond_to_icmp( + sz == 16 ? /*bi_invert_cond*/(ins->cond) : ins->cond, + &flip, Bl == nir_type_uint, sz == 16); + + if (sz == 32) + return bi_pack_add_icmp32(ins, regs, flip, cond); + else if (sz == 16) + return bi_pack_add_icmp16(ins, regs, flip, cond); + else + unreachable("TODO"); + } else { + unreachable("TODO"); + } +} + +static unsigned +bi_pack_add_imath(bi_instruction *ins, struct bi_registers *regs) +{ + /* TODO: 32+16 add */ + assert(ins->src_types[0] == ins->src_types[1]); + unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]); + enum bi_imath_op p = ins->op.imath; + + unsigned op = 0; + + if (sz == 8) { + op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_8 : + BIFROST_ADD_ISUB_8; + } else if (sz == 16) { + op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_16 : + BIFROST_ADD_ISUB_16; + } else if (sz == 32) { + op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_32 : + BIFROST_ADD_ISUB_32; + } else { + unreachable("64-bit todo"); + } + + return bi_pack_add_2src(ins, regs, op); +} + static unsigned -bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) +bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs, gl_shader_stage stage) { if (!bundle.add) return BIFROST_ADD_NOP; @@ -1381,19 +1622,23 @@ bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) case BI_ATEST: return bi_pack_add_atest(clause, bundle.add, regs); case BI_BRANCH: + unreachable("Packing todo"); case BI_CMP: - return BIFROST_ADD_NOP; + return bi_pack_add_cmp(bundle.add, regs); case BI_BLEND: return bi_pack_add_blend(clause, bundle.add, regs); case BI_BITWISE: - return BIFROST_ADD_NOP; + unreachable("Packing todo"); case BI_CONVERT: return bi_pack_convert(bundle.add, regs, false); case BI_DISCARD: + return bi_pack_add_discard(bundle.add, regs); case BI_FREXP: - case BI_ISUB: + unreachable("Packing todo"); + case BI_IMATH: + return bi_pack_add_imath(bundle.add, regs); case BI_LOAD: - return BIFROST_ADD_NOP; + unreachable("Packing todo"); case BI_LOAD_ATTR: return bi_pack_add_ld_attr(clause, bundle.add, regs); case BI_LOAD_UNIFORM: @@ -1407,7 +1652,7 @@ bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) case BI_MOV: case BI_SHIFT: case BI_STORE: - return BIFROST_ADD_NOP; + unreachable("Packing todo"); case BI_STORE_VAR: return bi_pack_add_st_vary(clause, bundle.add, regs); case BI_SPECIAL: @@ -1418,11 +1663,11 @@ bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs) return bi_pack_add_select(bundle.add, regs); case BI_TEX: if (bundle.add->op.texture == BI_TEX_COMPACT) - return bi_pack_add_tex_compact(clause, bundle.add, regs); + return bi_pack_add_tex_compact(clause, bundle.add, regs, stage); else unreachable("Unknown tex type"); case BI_ROUND: - return BIFROST_ADD_NOP; + unreachable("Packing todo"); default: unreachable("Cannot encode class as ADD"); } @@ -1434,7 +1679,7 @@ struct bi_packed_bundle { }; static struct bi_packed_bundle -bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_bundle) +bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_bundle, gl_shader_stage stage) { struct bi_registers regs = bi_assign_ports(bundle, prev); bi_assign_uniform_constant(clause, ®s, bundle); @@ -1442,7 +1687,7 @@ bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_b uint64_t reg = bi_pack_registers(regs); uint64_t fma = bi_pack_fma(clause, bundle, ®s); - uint64_t add = bi_pack_add(clause, bundle, ®s); + uint64_t add = bi_pack_add(clause, bundle, ®s, stage); struct bi_packed_bundle packed = { .lo = reg | (fma << 35) | ((add & 0b111111) << 58), @@ -1491,9 +1736,9 @@ bi_pack_constants(bi_context *ctx, bi_clause *clause, static void bi_pack_clause(bi_context *ctx, bi_clause *clause, bi_clause *next, - struct util_dynarray *emission) + struct util_dynarray *emission, gl_shader_stage stage) { - struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, clause->bundles[0], clause->bundles[0], true); + struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, clause->bundles[0], clause->bundles[0], true, stage); assert(clause->bundle_count == 1); /* Used to decide if we elide writes */ @@ -1550,7 +1795,7 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission) bi_foreach_clause_in_block(block, clause) { bi_clause *next = bi_next_clause(ctx, _block, clause); - bi_pack_clause(ctx, clause, next, emission); + bi_pack_clause(ctx, clause, next, emission, ctx->stage); } } }