if (instr->intrinsic == nir_intrinsic_read_invocation || !nir_src_is_divergent(instr->src[1]))
tid = bld.as_uniform(tid);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
- if (src.regClass() == v1) {
+ if (src.regClass() == v1b || src.regClass() == v2b) {
+ Temp tmp = bld.tmp(v1);
+ tmp = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), tmp);
+ if (dst.type() == RegType::vgpr)
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
+ else
+ bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
+ } else if (src.regClass() == v1) {
emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), dst);
} else if (src.regClass() == v2) {
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp,
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))));
emit_wqm(ctx, tmp, dst);
+ } else if (instr->dest.ssa.bit_size == 8) {
+ Temp tmp = bld.tmp(v1);
+ emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
+ } else if (instr->dest.ssa.bit_size == 16) {
+ Temp tmp = bld.tmp(v1);
+ emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
} else if (instr->dest.ssa.bit_size == 32) {
if (ctx->program->chip_class >= GFX8)
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst);
src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src);
emit_wqm(ctx, tmp, dst);
+ } else if (instr->dest.ssa.bit_size == 8) {
+ Temp tmp = bld.tmp(v1);
+ emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
+ } else if (instr->dest.ssa.bit_size == 16) {
+ Temp tmp = bld.tmp(v1);
+ emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
} else if (instr->dest.ssa.bit_size == 32) {
Temp tmp;
if (ctx->program->chip_class >= GFX8)
get_ssa_temp(ctx, &instr->dest.ssa));
break;
}
- case nir_intrinsic_shader_clock:
- bld.smem(aco_opcode::s_memtime, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), false);
+ case nir_intrinsic_shader_clock: {
+ aco_opcode opcode =
+ nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
+ aco_opcode::s_memrealtime : aco_opcode::s_memtime;
+ bld.smem(opcode, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), false);
emit_split_vector(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 2);
break;
+ }
case nir_intrinsic_load_vertex_id_zero_base: {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vertex_id));
}
if (instr->op == nir_texop_tg4) {
- if (has_offset) {
- opcode = aco_opcode::image_gather4_lz_o;
- if (has_compare)
+ if (has_offset) { /* image_gather4_*_o */
+ if (has_compare) {
opcode = aco_opcode::image_gather4_c_lz_o;
+ if (has_lod)
+ opcode = aco_opcode::image_gather4_c_l_o;
+ if (has_bias)
+ opcode = aco_opcode::image_gather4_c_b_o;
+ } else {
+ opcode = aco_opcode::image_gather4_lz_o;
+ if (has_lod)
+ opcode = aco_opcode::image_gather4_l_o;
+ if (has_bias)
+ opcode = aco_opcode::image_gather4_b_o;
+ }
} else {
- opcode = aco_opcode::image_gather4_lz;
- if (has_compare)
+ if (has_compare) {
opcode = aco_opcode::image_gather4_c_lz;
+ if (has_lod)
+ opcode = aco_opcode::image_gather4_c_l;
+ if (has_bias)
+ opcode = aco_opcode::image_gather4_c_b;
+ } else {
+ opcode = aco_opcode::image_gather4_lz;
+ if (has_lod)
+ opcode = aco_opcode::image_gather4_l;
+ if (has_bias)
+ opcode = aco_opcode::image_gather4_b;
+ }
}
} else if (instr->op == nir_texop_lod) {
opcode = aco_opcode::image_get_lod;