namespace aco {
namespace {
+#define isel_err(...) _isel_err(ctx, __FILE__, __LINE__, __VA_ARGS__)
+
+static void _isel_err(isel_context *ctx, const char *file, unsigned line,
+ const nir_instr *instr, const char *msg)
+{
+ char *out;
+ size_t outsize;
+ FILE *memf = open_memstream(&out, &outsize);
+
+ fprintf(memf, "%s: ", msg);
+ nir_print_instr(instr, memf);
+ fclose(memf);
+
+ _aco_err(ctx->program, file, line, out);
+ free(out);
+}
+
class loop_info_RAII {
isel_context* ctx;
unsigned header_idx_old;
return emit_wqm(ctx, tmp, dst);
}
+Temp convert_int(isel_context *ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits, bool is_signed, Temp dst=Temp())
+{
+ if (!dst.id()) {
+ if (dst_bits % 32 == 0 || src.type() == RegType::sgpr)
+ dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
+ else
+ dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword());
+ }
+
+ if (dst.bytes() == src.bytes() && dst_bits < src_bits)
+ return bld.copy(Definition(dst), src);
+ else if (dst.bytes() < src.bytes())
+ return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
+
+ Temp tmp = dst;
+ if (dst_bits == 64)
+ tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1);
+
+ if (tmp == src) {
+ } else if (src.regClass() == s1) {
+ if (is_signed)
+ bld.sop1(src_bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16, Definition(tmp), src);
+ else
+ bld.sop2(aco_opcode::s_and_b32, Definition(tmp), bld.def(s1, scc), Operand(src_bits == 8 ? 0xFFu : 0xFFFFu), src);
+ } else if (ctx->options->chip_class >= GFX8) {
+ assert(src_bits != 8 || src.regClass() == v1b);
+ assert(src_bits != 16 || src.regClass() == v2b);
+ aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
+ sdwa->operands[0] = Operand(src);
+ sdwa->definitions[0] = Definition(tmp);
+ if (is_signed)
+ sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
+ else
+ sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
+ sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
+ bld.insert(std::move(sdwa));
+ } else {
+ assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7);
+ aco_opcode opcode = is_signed ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32;
+ bld.vop3(opcode, Definition(tmp), src, Operand(0u), Operand(src_bits == 8 ? 8u : 16u));
+ }
+
+ if (dst_bits == 64) {
+ if (is_signed && dst.regClass() == s2) {
+ Temp high = bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand(31u));
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
+ } else if (is_signed && dst.regClass() == v2) {
+ Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), tmp);
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
+ } else {
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u));
+ }
+ }
+
+ return dst;
+}
+
+enum sgpr_extract_mode {
+ sgpr_extract_sext,
+ sgpr_extract_zext,
+ sgpr_extract_undef,
+};
+
+Temp extract_8_16_bit_sgpr_element(isel_context *ctx, Temp dst, nir_alu_src *src, sgpr_extract_mode mode)
+{
+ Temp vec = get_ssa_temp(ctx, src->src.ssa);
+ unsigned src_size = src->src.ssa->bit_size;
+ unsigned swizzle = src->swizzle[0];
+
+ if (vec.size() > 1) {
+ assert(src_size == 16);
+ vec = emit_extract_vector(ctx, vec, swizzle / 2, s1);
+ swizzle = swizzle & 1;
+ }
+
+ Builder bld(ctx->program, ctx->block);
+ unsigned offset = src_size * swizzle;
+ Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
+
+ if (mode == sgpr_extract_undef && swizzle == 0) {
+ bld.copy(Definition(tmp), vec);
+ } else if (mode == sgpr_extract_undef || (offset == 24 && mode == sgpr_extract_zext)) {
+ bld.sop2(aco_opcode::s_lshr_b32, Definition(tmp), bld.def(s1, scc), vec, Operand(offset));
+ } else if (src_size == 8 && swizzle == 0 && mode == sgpr_extract_sext) {
+ bld.sop1(aco_opcode::s_sext_i32_i8, Definition(tmp), vec);
+ } else if (src_size == 16 && swizzle == 0 && mode == sgpr_extract_sext) {
+ bld.sop1(aco_opcode::s_sext_i32_i16, Definition(tmp), vec);
+ } else {
+ aco_opcode op = mode == sgpr_extract_zext ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
+ bld.sop2(op, Definition(tmp), bld.def(s1, scc), vec, Operand((src_size << 16) | offset));
+ }
+
+ if (dst.regClass() == s2)
+ convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst);
+
+ return dst;
+}
+
Temp get_alu_src(struct isel_context *ctx, nir_alu_src src, unsigned size=1)
{
if (src.src.ssa->num_components == 1 && src.swizzle[0] == 0 && size == 1)
if (elem_size < 4 && vec.type() == RegType::sgpr) {
assert(src.src.ssa->bit_size == 8 || src.src.ssa->bit_size == 16);
assert(size == 1);
- unsigned swizzle = src.swizzle[0];
- if (vec.size() > 1) {
- assert(src.src.ssa->bit_size == 16);
- vec = emit_extract_vector(ctx, vec, swizzle / 2, s1);
- swizzle = swizzle & 1;
- }
- if (swizzle == 0)
- return vec;
-
- Temp dst{ctx->program->allocateId(), s1};
- aco_ptr<SOP2_instruction> bfe{create_instruction<SOP2_instruction>(aco_opcode::s_bfe_u32, Format::SOP2, 2, 2)};
- bfe->operands[0] = Operand(vec);
- bfe->operands[1] = Operand(uint32_t((src.src.ssa->bit_size << 16) | (src.src.ssa->bit_size * swizzle)));
- bfe->definitions[0] = Definition(dst);
- bfe->definitions[1] = Definition(ctx->program->allocateId(), scc, s1);
- ctx->block->instructions.emplace_back(std::move(bfe));
- return dst;
+ return extract_8_16_bit_sgpr_element(
+ ctx, Temp(ctx->program->allocateId(), s1), &src, sgpr_extract_undef);
}
RegClass elem_rc = elem_size < 4 ? RegClass(vec.type(), elem_size).as_subdword() : RegClass(vec.type(), elem_size / 4);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
return;
}
aco_opcode op = dst.regClass() == s1 ? aco_opcode::s_cselect_b32 : aco_opcode::s_cselect_b64;
bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond)));
} else {
- fprintf(stderr, "Unimplemented uniform bcsel bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented uniform bcsel bit size");
}
return;
}
return add->definitions[0].getTemp();
}
-Temp convert_int(isel_context *ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits, bool is_signed, Temp dst=Temp()) {
- if (!dst.id()) {
- if (dst_bits % 32 == 0 || src.type() == RegType::sgpr)
- dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
- else
- dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword());
- }
-
- if (dst.bytes() == src.bytes() && dst_bits < src_bits)
- return bld.copy(Definition(dst), src);
- else if (dst.bytes() < src.bytes())
- return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
-
- Temp tmp = dst;
- if (dst_bits == 64)
- tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1);
-
- if (tmp == src) {
- } else if (src.regClass() == s1) {
- if (is_signed)
- bld.sop1(src_bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16, Definition(tmp), src);
- else
- bld.sop2(aco_opcode::s_and_b32, Definition(tmp), bld.def(s1, scc), Operand(src_bits == 8 ? 0xFFu : 0xFFFFu), src);
- } else if (ctx->options->chip_class >= GFX8) {
- assert(src_bits != 8 || src.regClass() == v1b);
- assert(src_bits != 16 || src.regClass() == v2b);
- aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
- sdwa->operands[0] = Operand(src);
- sdwa->definitions[0] = Definition(tmp);
- if (is_signed)
- sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
- else
- sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
- sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
- bld.insert(std::move(sdwa));
- } else {
- assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7);
- aco_opcode opcode = is_signed ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32;
- bld.vop3(opcode, Definition(tmp), src, Operand(0u), Operand(src_bits == 8 ? 8u : 16u));
- }
-
- if (dst_bits == 64) {
- if (is_signed && dst.regClass() == s2) {
- Temp high = bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand(31u));
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
- } else if (is_signed && dst.regClass() == v2) {
- Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), tmp);
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
- } else {
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u));
- }
- }
-
- return dst;
-}
-
void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
{
if (!instr->dest.dest.is_ssa) {
- fprintf(stderr, "nir alu dst not in ssa: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "nir alu dst not in ssa");
abort();
}
Builder bld(ctx->program, ctx->block);
aco_opcode opcode = dst.size() == 1 ? aco_opcode::s_not_b32 : aco_opcode::s_not_b64;
bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
bld.vop2(aco_opcode::v_max_i32, Definition(dst), src, bld.vsub32(bld.def(v1), Operand(0u), src));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), neg, gtz);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_max_i32, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_max_u32, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_min_i32, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_min_u32, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b64, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b64, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b64, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b32, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b64, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i64, dst, true);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (src.regClass() == s2) {
bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
Temp carry = bld.vsub32(Definition(msb), Operand(31u), Operand(msb_rev), true).def(1).getTemp();
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand((uint32_t)-1), carry);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v1) {
bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), tmp, Operand((uint32_t) -1), carry);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), carry);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand(0u));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), borrow);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand(0u));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), src0, src1);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, src1);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
sub->neg[1] = true;
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop3(aco_opcode::v_max_f64, Definition(dst), src0, src1);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop3(aco_opcode::v_min_f64, Definition(dst), src0, src1);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_fmax3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_fmin3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_fmed3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_umax3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_u32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_umin3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_u32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_umed3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_u32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_imax3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_i32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_imin3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_i32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
- case nir_op_imed3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_i32, dst);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma);
Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
- sc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), sc, ma, Operand(0x3f000000u/*0.5*/));
- tc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), tc, ma, Operand(0x3f000000u/*0.5*/));
+ sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma), Operand(0x3f000000u/*0.5*/));
+ tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma), Operand(0x3f000000u/*0.5*/));
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
break;
}
/* Lowered at NIR level for precision reasons. */
emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), upper);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7FFFFFFFu), upper);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
vop3->clamp = true;
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v1) {
emit_log2(ctx, bld, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
/* Lowered at NIR level for precision reasons. */
emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f32, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
/* Lowered at NIR level for precision reasons. */
emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
emit_floor_f64(ctx, bld, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop3(aco_opcode::v_add_f64, Definition(dst), trunc, add);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
emit_trunc_f64(ctx, bld, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
}
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
aco_opcode opcode = instr->op == nir_op_fsin ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
bld.vop1(opcode, Definition(dst), tmp);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_ldexp_f64, Definition(dst), as_vgpr(ctx, src0), src1);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v2) {
bld.vop1(aco_opcode::v_frexp_mant_f64, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (instr->src[0].src.ssa->bit_size == 64) {
bld.vop1(aco_opcode::v_frexp_exp_i32_f64, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (instr->src[0].src.ssa->bit_size == 64) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u));
bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (instr->src[0].src.ssa->bit_size == 64) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (instr->src[0].src.ssa->bit_size == 64) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
case nir_op_i2i16:
case nir_op_i2i32:
case nir_op_i2i64: {
- convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]),
- instr->src[0].src.ssa->bit_size, instr->dest.dest.ssa.bit_size, true, dst);
+ if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
+ /* no need to do the extract in get_alu_src() */
+ sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size ?
+ sgpr_extract_sext : sgpr_extract_undef;
+ extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
+ } else {
+ convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]),
+ instr->src[0].src.ssa->bit_size, instr->dest.dest.ssa.bit_size, true, dst);
+ }
break;
}
case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
case nir_op_u2u64: {
- convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]),
- instr->src[0].src.ssa->bit_size, instr->dest.dest.ssa.bit_size, false, dst);
+ if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
+ /* no need to do the extract in get_alu_src() */
+ sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size ?
+ sgpr_extract_zext : sgpr_extract_undef;
+ extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
+ } else {
+ convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]),
+ instr->src[0].src.ssa->bit_size, instr->dest.dest.ssa.bit_size, false, dst);
+ }
break;
}
case nir_op_b2b32:
Temp src0 = bld.tmp(v1);
Temp src1 = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src);
- if (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)
+ if (0 && (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)) {
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1);
- else
- bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst),
- bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src0),
- bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src1));
+ } else {
+ src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src0);
+ src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src1);
+ if (ctx->program->chip_class >= GFX10) {
+ /* the high bits of v_cvt_f16_f32 isn't zero'd on GFX10 */
+ bld.vop3(aco_opcode::v_pack_b32_f16, Definition(dst), src0, src1);
+ } else {
+ bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst), src0, src1);
+ }
+ }
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
if (dst.regClass() == v1) {
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), get_alu_src(ctx, instr->src[0]));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst),
bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(16u), as_vgpr(ctx, get_alu_src(ctx, instr->src[0]))));
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_bfm_b32, Definition(dst), bits, offset);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.vop3(aco_opcode::v_bfi_b32, Definition(dst), bitmask, insert, base);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
} else if (src.regClass() == s2) {
bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32, aco_opcode::v_cmp_eq_f64);
break;
}
- case nir_op_fne: {
+ case nir_op_fneu: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32, aco_opcode::v_cmp_neq_f64);
break;
}
break;
}
default:
- fprintf(stderr, "Unknown NIR ALU instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unknown NIR ALU instr");
}
}
ctx->shader->info.stage == MESA_SHADER_GEOMETRY) {
bool stored_to_temps = store_output_to_temps(ctx, instr);
if (!stored_to_temps) {
- fprintf(stderr, "Unimplemented output offset instruction:\n");
- nir_print_instr(instr->src[1].ssa->parent_instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
abort();
}
} else if (ctx->stage == vertex_es ||
nir_instr *off_instr = instr->src[0].ssa->parent_instr;
if (off_instr->type != nir_instr_type_load_const) {
- fprintf(stderr, "Unimplemented nir_intrinsic_load_input offset\n");
- nir_print_instr(off_instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(off_instr, "Unimplemented nir_intrinsic_load_input offset");
}
uint32_t offset = nir_instr_as_load_const(off_instr)->value[0].u32;
nir_instr *off_instr = instr->src[offset_idx].ssa->parent_instr;
if (off_instr->type != nir_instr_type_load_const ||
nir_instr_as_load_const(off_instr)->value[0].u32 != 0) {
- fprintf(stderr, "Unimplemented nir_intrinsic_load_input offset\n");
- nir_print_instr(off_instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(off_instr, "Unimplemented nir_intrinsic_load_input offset");
}
Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
assert(nir_instr_is_last(&instr->instr));
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(ctx->block->index, linear_target);
return;
}
ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx;
/* remove critical edges from linear CFG */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* break_block = ctx->program->create_and_insert_block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind |= block_kind_uniform;
add_linear_edge(idx, break_block);
add_linear_edge(break_block->index, linear_target);
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
}
/* LOD */
+ assert(nir_src_as_uint(instr->src[1]) == 0);
Temp lod = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
/* Resource */
bool smem = !nir_src_is_divergent(instr->src[2]) &&
ctx->options->chip_class >= GFX8 &&
+ ctx->options->chip_class < GFX10_3 &&
(elem_size_bytes >= 4 || can_subdword_ssbo_store_use_smem(instr)) &&
allow_smem;
if (smem)
unreachable("invalid scope");
}
-void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
+void emit_scoped_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
Builder bld(ctx->program, ctx->block);
- storage_class all_mem = (storage_class)(storage_buffer | storage_image | storage_atomic_counter | storage_shared);
- switch(instr->intrinsic) {
- case nir_intrinsic_group_memory_barrier:
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info(all_mem, semantic_acqrel, scope_workgroup));
- break;
- case nir_intrinsic_memory_barrier:
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info(all_mem, semantic_acqrel, scope_device));
- break;
- case nir_intrinsic_memory_barrier_buffer:
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info((storage_class)storage_buffer, semantic_acqrel, scope_device));
- case nir_intrinsic_memory_barrier_image:
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info((storage_class)storage_image, semantic_acqrel, scope_device));
- break;
- case nir_intrinsic_memory_barrier_tcs_patch:
- case nir_intrinsic_memory_barrier_shared:
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info(storage_shared, semantic_acqrel, scope_workgroup));
- break;
- case nir_intrinsic_scoped_barrier: {
- unsigned semantics = 0;
- unsigned storage = 0;
- sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr));
- sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr));
-
- unsigned nir_storage = nir_intrinsic_memory_modes(instr);
- if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global))
- storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image
- if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared))
- storage |= storage_shared;
- if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out))
- storage |= storage_shared;
-
- unsigned nir_semantics = nir_intrinsic_memory_semantics(instr);
- if (nir_semantics & NIR_MEMORY_ACQUIRE)
- semantics |= semantic_acquire | semantic_release;
- if (nir_semantics & NIR_MEMORY_RELEASE)
- semantics |= semantic_acquire | semantic_release;
-
- assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
- bld.barrier(aco_opcode::p_barrier,
- memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
- exec_scope);
- break;
- }
- default:
- unreachable("Unimplemented memory barrier intrinsic");
- break;
- }
+ unsigned semantics = 0;
+ unsigned storage = 0;
+ sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr));
+ sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr));
+
+ unsigned nir_storage = nir_intrinsic_memory_modes(instr);
+ if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global))
+ storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image
+ if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared))
+ storage |= storage_shared;
+ if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out))
+ storage |= storage_shared;
+
+ unsigned nir_semantics = nir_intrinsic_memory_semantics(instr);
+ if (nir_semantics & NIR_MEMORY_ACQUIRE)
+ semantics |= semantic_acquire | semantic_release;
+ if (nir_semantics & NIR_MEMORY_RELEASE)
+ semantics |= semantic_acquire | semantic_release;
+
+ assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ bld.barrier(aco_opcode::p_barrier,
+ memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
+ exec_scope);
}
void visit_load_shared(isel_context *ctx, nir_intrinsic_instr *instr)
} else if (src.regClass() == s2) {
bld.sop1(aco_opcode::s_mov_b64, dst, src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
}
}
/* res_k = p_k + ddx_k * pos1 + ddy_k * pos2 */
- Temp tmp1 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddx_1, pos1, p1);
- Temp tmp2 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddx_2, pos1, p2);
- tmp1 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddy_1, pos2, tmp1);
- tmp2 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddy_2, pos2, tmp2);
+ aco_opcode mad = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fma_f32 : aco_opcode::v_mad_f32;
+ Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
+ Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
+ tmp1 = bld.vop3(mad, bld.def(v1), ddy_1, pos2, tmp1);
+ tmp2 = bld.vop3(mad, bld.def(v1), ddy_2, pos2, tmp2);
Temp wqm1 = bld.tmp(v1);
emit_wqm(ctx, tmp1, wqm1, true);
Temp wqm2 = bld.tmp(v1);
case nir_intrinsic_get_buffer_size:
visit_get_buffer_size(ctx, instr);
break;
- case nir_intrinsic_control_barrier: {
- bld.barrier(aco_opcode::p_barrier, memory_sync_info(0, 0, scope_invocation), scope_workgroup);
- break;
- }
- case nir_intrinsic_memory_barrier_tcs_patch:
- case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_scoped_barrier:
- emit_memory_barrier(ctx, instr);
+ emit_scoped_barrier(ctx, instr);
break;
case nir_intrinsic_load_num_work_groups: {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
} else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) {
bld.vopc(aco_opcode::v_cmp_lg_u64, lanemask_tmp, Operand(0u), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
if (dst.size() != bld.lm.size()) {
/* Wave32 with ballot size set to 64 */
tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), tmp);
emit_wqm(ctx, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), tmp), dst);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
}
break;
} else if (src.regClass() == s2) {
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
emit_split_vector(ctx, dst, 2);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
}
break;
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
emit_split_vector(ctx, dst, 2);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
emit_split_vector(ctx, dst, 2);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
emit_split_vector(ctx, dst, 2);
} else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
break;
}
case nir_intrinsic_shader_clock: {
- aco_opcode opcode =
- nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
- aco_opcode::s_memrealtime : aco_opcode::s_memtime;
- bld.smem(opcode, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), memory_sync_info(0, semantic_volatile));
- emit_split_vector(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 2);
+ Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
+ if (nir_intrinsic_memory_scope(instr) == NIR_SCOPE_SUBGROUP && ctx->options->chip_class >= GFX10_3) {
+ /* "((size - 1) << 11) | register" (SHADER_CYCLES is encoded as register 29) */
+ Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand(0u));
+ } else {
+ aco_opcode opcode =
+ nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
+ aco_opcode::s_memrealtime : aco_opcode::s_memtime;
+ bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile));
+ }
+ emit_split_vector(ctx, dst, 2);
break;
}
case nir_intrinsic_load_vertex_id_zero_base: {
break;
}
default:
- fprintf(stderr, "Unimplemented intrinsic instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unimplemented intrinsic instr");
abort();
break;
{
Builder bld(ctx->program, ctx->block);
Temp ma, tc, sc, id;
+ aco_opcode madak = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_madak_f32;
+ aco_opcode madmk = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fmamk_f32 : aco_opcode::v_madmk_f32;
if (is_array) {
coords[3] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coords[3]);
sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
if (!is_deriv)
- sc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), sc, invma, Operand(0x3fc00000u/*1.5*/));
+ sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand(0x3fc00000u/*1.5*/));
tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
if (!is_deriv)
- tc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), tc, invma, Operand(0x3fc00000u/*1.5*/));
+ tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand(0x3fc00000u/*1.5*/));
id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
}
if (is_array)
- id = bld.vop2(aco_opcode::v_madmk_f32, bld.def(v1), coords[3], id, Operand(0x41000000u/*8.0*/));
+ id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand(0x41000000u/*8.0*/));
coords.resize(3);
coords[0] = sc;
coords[1] = tc;
/* uniform break - directly jump out of the loop */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(idx, logical_target);
return;
}
/* uniform continue - directly jump to the loop header */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(idx, logical_target);
return;
}
break;
default:
- fprintf(stderr, "Unknown NIR jump instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(&instr->instr, "Unknown NIR jump instr");
abort();
}
}
/* remove critical edges from linear CFG */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* break_block = ctx->program->create_and_insert_block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind |= block_kind_uniform;
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
add_linear_edge(break_block->index, logical_target);
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
visit_jump(ctx, nir_instr_as_jump(instr));
break;
default:
- fprintf(stderr, "Unknown NIR instr type: ");
- nir_print_instr(instr, stderr);
- fprintf(stderr, "\n");
+ isel_err(instr, "Unknown NIR instr type");
//abort();
}
}
append_logical_end(ctx->block);
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
Builder bld(ctx->program, ctx->block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
unsigned loop_preheader_idx = ctx->block->index;
Block loop_exit = Block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind = block_kind_uniform;
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(block_idx, break_block);
add_linear_edge(break_block->index, &loop_exit);
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
continue_block->kind = block_kind_uniform;
bld.reset(continue_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(block_idx, continue_block);
add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]);
}
bld.reset(ctx->block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
}
/* Fixup phis in loop header from unreachable blocks.
/* branch to linear then block */
assert(cond.regClass() == ctx->program->lane_mask);
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(cond);
ctx->block->instructions.push_back(std::move(branch));
append_logical_end(BB_then_logical);
/* branch from logical then block to invert block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_logical->index, &ic->BB_invert);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
BB_then_linear->kind |= block_kind_uniform;
add_linear_edge(ic->BB_if_idx, BB_then_linear);
/* branch from linear then block to invert block */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_linear->index, &ic->BB_invert);
ic->invert_idx = ctx->block->index;
/* branch to linear else block (skip else) */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(ic->cond);
ctx->block->instructions.push_back(std::move(branch));
/* branch from logical else block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_logical->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
add_linear_edge(ic->invert_idx, BB_else_linear);
/* branch from linear else block to endif block */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_linear->index, &ic->BB_endif);
aco_ptr<Pseudo_branch_instruction> branch;
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
- branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(cond);
branch->operands[0].setFixed(scc);
ctx->block->instructions.emplace_back(std::move(branch));
append_logical_end(BB_then);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then->index, &ic->BB_endif);
if (!ic->then_branch_divergent)
append_logical_end(BB_else);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
else
exp->operands[i] = Operand(v1);
}
- /* Navi10-14 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
+ /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
* Setting valid_mask=1 prevents it and has no other effect.
*/
- exp->valid_mask = ctx->options->chip_class >= GFX10 && is_pos && *next_pos == 0;
+ exp->valid_mask = ctx->options->chip_class == GFX10 && is_pos && *next_pos == 0;
exp->done = false;
exp->compressed = false;
if (is_pos)
exp->enabled_mask |= 0x4;
}
}
- exp->valid_mask = ctx->options->chip_class >= GFX10 && *next_pos == 0;
+ exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
exp->done = false;
exp->compressed = false;
exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++;
Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream));
append_logical_end(ctx.block);
ctx.block->kind |= block_kind_uniform;
- bld.branch(aco_opcode::p_cbranch_z, cond);
+ bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond);
BB_endif.kind |= ctx.block->kind & block_kind_top_level;
append_logical_end(ctx.block);
/* branch from then block to endif block */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_edge(ctx.block->index, &BB_endif);
ctx.block->kind |= block_kind_uniform;
append_logical_end(BB_else);
/* branch from else block to endif block */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_edge(BB_else->index, &BB_endif);
BB_else->kind |= block_kind_uniform;
cleanup_cfg(program);
}
+
+void select_trap_handler_shader(Program *program, struct nir_shader *shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args)
+{
+ assert(args->options->chip_class == GFX8);
+
+ init_program(program, compute_cs, args->shader_info,
+ args->options->chip_class, args->options->family, config);
+
+ isel_context ctx = {};
+ ctx.program = program;
+ ctx.args = args;
+ ctx.options = args->options;
+ ctx.stage = program->stage;
+
+ ctx.block = ctx.program->create_and_insert_block();
+ ctx.block->loop_nest_depth = 0;
+ ctx.block->kind = block_kind_top_level;
+
+ program->workgroup_size = 1; /* XXX */
+
+ add_startpgm(&ctx);
+ append_logical_start(ctx.block);
+
+ Builder bld(ctx.program, ctx.block);
+
+ /* Load the buffer descriptor from TMA. */
+ bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4),
+ Operand(PhysReg{tma}, s2), Operand(0u));
+
+ /* Store TTMP0-TTMP1. */
+ bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4),
+ Operand(0u), Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true);
+
+ uint32_t hw_regs_idx[] = {
+ 2, /* HW_REG_STATUS */
+ 3, /* HW_REG_TRAP_STS */
+ 4, /* HW_REG_HW_ID */
+ 7, /* HW_REG_IB_STS */
+ };
+
+ /* Store some hardware registers. */
+ for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
+ /* "((size - 1) << 11) | register" */
+ bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1),
+ ((20 - 1) << 11) | hw_regs_idx[i]);
+
+ bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
+ Operand(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true);
+ }
+
+ program->config->float_mode = program->blocks[0].fp_mode.val;
+
+ append_logical_end(ctx.block);
+ ctx.block->kind |= block_kind_uniform;
+ bld.sopp(aco_opcode::s_endpgm);
+
+ cleanup_cfg(program);
+}
}