+bi_pack_add_discard(bi_instruction *ins, bi_registers *regs)
+{
+ bool fp16 = ins->src_types[0] == nir_type_float16;
+ assert(fp16 || ins->src_types[0] == nir_type_float32);
+
+ bool flip = false;
+ enum bifrost_discard_cond cond = bi_cond_to_discard(ins->cond, &flip);
+
+ struct bifrost_add_discard pack = {
+ .src0 = bi_get_src(ins, regs, flip ? 1 : 0),
+ .src1 = bi_get_src(ins, regs, flip ? 0 : 1),
+ .cond = cond,
+ .src0_select = fp16 ? ins->swizzle[0][0] : 0,
+ .src1_select = fp16 ? ins->swizzle[1][0] : 0,
+ .fp32 = fp16 ? 0 : 1,
+ .op = BIFROST_ADD_OP_DISCARD
+ };
+
+ RETURN_PACKED(pack);
+}
+
+static enum bifrost_icmp_cond
+bi_cond_to_icmp(enum bi_cond cond, bool *flip, bool is_unsigned, bool is_16)
+{
+ switch (cond){
+ case BI_COND_LT:
+ *flip = true;
+ /* fallthrough */
+ case BI_COND_GT:
+ return is_unsigned ? (is_16 ? BIFROST_ICMP_IGE : BIFROST_ICMP_UGT)
+ : BIFROST_ICMP_IGT;
+ case BI_COND_LE:
+ *flip = true;
+ /* fallthrough */
+ case BI_COND_GE:
+ return is_unsigned ? BIFROST_ICMP_UGE :
+ (is_16 ? BIFROST_ICMP_UGT : BIFROST_ICMP_IGE);
+ case BI_COND_NE:
+ return BIFROST_ICMP_NEQ;
+ case BI_COND_EQ:
+ return BIFROST_ICMP_EQ;
+ default:
+ unreachable("Invalid op for icmp");
+ }
+}
+
+static unsigned
+bi_pack_add_icmp32(bi_instruction *ins, bi_registers *regs, bool flip,
+ enum bifrost_icmp_cond cond)
+{
+ struct bifrost_add_icmp pack = {
+ .src0 = bi_get_src(ins, regs, flip ? 1 : 0),
+ .src1 = bi_get_src(ins, regs, flip ? 0 : 1),
+ .cond = cond,
+ .sz = 1,
+ .d3d = true,
+ .op = BIFROST_ADD_OP_ICMP_32
+ };
+
+ RETURN_PACKED(pack);
+}
+
+static unsigned
+bi_pack_add_icmp16(bi_instruction *ins, bi_registers *regs, bool flip,
+ enum bifrost_icmp_cond cond)
+{
+ struct bifrost_add_icmp16 pack = {
+ .src0 = bi_get_src(ins, regs, flip ? 1 : 0),
+ .src1 = bi_get_src(ins, regs, flip ? 0 : 1),
+ .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0),
+ .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1),
+ .cond = cond,
+ .d3d = true,
+ .op = BIFROST_ADD_OP_ICMP_16
+ };
+
+ RETURN_PACKED(pack);
+}
+
+static unsigned
+bi_pack_add_cmp(bi_instruction *ins, bi_registers *regs)
+{
+ nir_alu_type Tl = ins->src_types[0];
+ nir_alu_type Tr = ins->src_types[1];
+ nir_alu_type Bl = nir_alu_type_get_base_type(Tl);
+
+ if (Bl == nir_type_uint || Bl == nir_type_int) {
+ assert(Tl == Tr);
+ unsigned sz = nir_alu_type_get_type_size(Tl);
+
+ bool flip = false;
+
+ enum bifrost_icmp_cond cond = bi_cond_to_icmp(
+ sz == 16 ? /*bi_invert_cond*/(ins->cond) : ins->cond,
+ &flip, Bl == nir_type_uint, sz == 16);
+
+ if (sz == 32)
+ return bi_pack_add_icmp32(ins, regs, flip, cond);
+ else if (sz == 16)
+ return bi_pack_add_icmp16(ins, regs, flip, cond);
+ else
+ unreachable("TODO");
+ } else {
+ unreachable("TODO");
+ }
+}
+
+static unsigned
+bi_pack_add_imath(bi_instruction *ins, bi_registers *regs)
+{
+ /* TODO: 32+16 add */
+ assert(ins->src_types[0] == ins->src_types[1]);
+ unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]);
+ enum bi_imath_op p = ins->op.imath;
+
+ unsigned op = 0;
+
+ if (sz == 8) {
+ op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_8 :
+ BIFROST_ADD_ISUB_8;
+ } else if (sz == 16) {
+ op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_16 :
+ BIFROST_ADD_ISUB_16;
+ } else if (sz == 32) {
+ op = (p == BI_IMATH_ADD) ? BIFROST_ADD_IADD_32 :
+ BIFROST_ADD_ISUB_32;
+ } else {
+ unreachable("64-bit todo");
+ }
+
+ return bi_pack_add_2src(ins, regs, op);
+}
+
+static unsigned
+bi_pack_add_branch_cond(bi_instruction *ins, bi_registers *regs)
+{
+ assert(ins->cond == BI_COND_EQ);
+ assert(ins->src[1] == BIR_INDEX_ZERO);
+
+ unsigned zero_ctrl = 0;
+ unsigned size = nir_alu_type_get_type_size(ins->src_types[0]);
+
+ if (size == 16) {
+ /* See BR_SIZE_ZERO swizzle disassembly */
+ zero_ctrl = ins->swizzle[0][0] ? 1 : 2;
+ } else {
+ assert(size == 32);
+ }
+
+ /* EQ swap to NE */
+ bool port_swapped = false;
+
+ /* We assigned the constant port to fetch the branch offset so we can
+ * just passthrough here. We put in the HI slot to match the blob since
+ * that's where the magic flags end up */
+ struct bifrost_branch pack = {
+ .src0 = bi_get_src(ins, regs, 0),
+ .src1 = (zero_ctrl << 1) | !port_swapped,
+ .src2 = BIFROST_SRC_CONST_HI,
+ .cond = BR_COND_EQ,
+ .size = BR_SIZE_ZERO,
+ .op = BIFROST_ADD_OP_BRANCH
+ };
+
+ RETURN_PACKED(pack);
+}
+
+static unsigned
+bi_pack_add_branch_uncond(bi_instruction *ins, bi_registers *regs)
+{
+ struct bifrost_branch pack = {
+ /* It's unclear what these bits actually mean */
+ .src0 = BIFROST_SRC_CONST_LO,
+ .src1 = BIFROST_SRC_PASS_FMA,
+
+ /* Offset, see above */
+ .src2 = BIFROST_SRC_CONST_HI,
+
+ /* All ones in fact */
+ .cond = (BR_ALWAYS & 0x7),
+ .size = (BR_ALWAYS >> 3),
+ .op = BIFROST_ADD_OP_BRANCH
+ };
+
+ RETURN_PACKED(pack);
+}
+
+static unsigned
+bi_pack_add_branch(bi_instruction *ins, bi_registers *regs)
+{
+ if (ins->cond == BI_COND_ALWAYS)
+ return bi_pack_add_branch_uncond(ins, regs);
+ else
+ return bi_pack_add_branch_cond(ins, regs);
+}
+
+static unsigned
+bi_pack_add(bi_clause *clause, bi_bundle bundle, bi_registers *regs, gl_shader_stage stage)