+Temp convert_int(isel_context *ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits, bool is_signed, Temp dst=Temp())
+{
+ if (!dst.id()) {
+ if (dst_bits % 32 == 0 || src.type() == RegType::sgpr)
+ dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
+ else
+ dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword());
+ }
+
+ if (dst.bytes() == src.bytes() && dst_bits < src_bits)
+ return bld.copy(Definition(dst), src);
+ else if (dst.bytes() < src.bytes())
+ return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
+
+ Temp tmp = dst;
+ if (dst_bits == 64)
+ tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1);
+
+ if (tmp == src) {
+ } else if (src.regClass() == s1) {
+ if (is_signed)
+ bld.sop1(src_bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16, Definition(tmp), src);
+ else
+ bld.sop2(aco_opcode::s_and_b32, Definition(tmp), bld.def(s1, scc), Operand(src_bits == 8 ? 0xFFu : 0xFFFFu), src);
+ } else if (ctx->options->chip_class >= GFX8) {
+ assert(src_bits != 8 || src.regClass() == v1b);
+ assert(src_bits != 16 || src.regClass() == v2b);
+ aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
+ sdwa->operands[0] = Operand(src);
+ sdwa->definitions[0] = Definition(tmp);
+ if (is_signed)
+ sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
+ else
+ sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
+ sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
+ bld.insert(std::move(sdwa));
+ } else {
+ assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7);
+ aco_opcode opcode = is_signed ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32;
+ bld.vop3(opcode, Definition(tmp), src, Operand(0u), Operand(src_bits == 8 ? 8u : 16u));
+ }
+
+ if (dst_bits == 64) {
+ if (is_signed && dst.regClass() == s2) {
+ Temp high = bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand(31u));
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
+ } else if (is_signed && dst.regClass() == v2) {
+ Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), tmp);
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
+ } else {
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u));
+ }
+ }
+
+ return dst;
+}
+
+enum sgpr_extract_mode {
+ sgpr_extract_sext,
+ sgpr_extract_zext,
+ sgpr_extract_undef,
+};
+
+Temp extract_8_16_bit_sgpr_element(isel_context *ctx, Temp dst, nir_alu_src *src, sgpr_extract_mode mode)
+{
+ Temp vec = get_ssa_temp(ctx, src->src.ssa);
+ unsigned src_size = src->src.ssa->bit_size;
+ unsigned swizzle = src->swizzle[0];
+
+ if (vec.size() > 1) {
+ assert(src_size == 16);
+ vec = emit_extract_vector(ctx, vec, swizzle / 2, s1);
+ swizzle = swizzle & 1;
+ }
+
+ Builder bld(ctx->program, ctx->block);
+ unsigned offset = src_size * swizzle;
+ Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
+
+ if (mode == sgpr_extract_undef && swizzle == 0) {
+ bld.copy(Definition(tmp), vec);
+ } else if (mode == sgpr_extract_undef || (offset == 24 && mode == sgpr_extract_zext)) {
+ bld.sop2(aco_opcode::s_lshr_b32, Definition(tmp), bld.def(s1, scc), vec, Operand(offset));
+ } else if (src_size == 8 && swizzle == 0 && mode == sgpr_extract_sext) {
+ bld.sop1(aco_opcode::s_sext_i32_i8, Definition(tmp), vec);
+ } else if (src_size == 16 && swizzle == 0 && mode == sgpr_extract_sext) {
+ bld.sop1(aco_opcode::s_sext_i32_i16, Definition(tmp), vec);
+ } else {
+ aco_opcode op = mode == sgpr_extract_zext ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
+ bld.sop2(op, Definition(tmp), bld.def(s1, scc), vec, Operand((src_size << 16) | offset));
+ }
+
+ if (dst.regClass() == s2)
+ convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst);
+
+ return dst;
+}
+