emit_split_vector(ctx, dst, 2);
else
emit_extract_vector(ctx, tmp, 0, dst);
- } else if (vec.size() == 4) {
- Temp lo = bld.tmp(s2), hi = bld.tmp(s2);
- bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
- hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand(0u));
+ } else if (vec.size() == 3 || vec.size() == 4) {
+ Temp lo = bld.tmp(s2), hi;
+ if (vec.size() == 3) {
+ /* this can happen if we use VMEM for a uniform load */
+ hi = bld.tmp(s1);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
+ } else {
+ hi = bld.tmp(s2);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
+ hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand(0u));
+ }
if (select != Temp())
hi = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand(0u), bld.scc(select));
lo = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lo, shift);
Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma), Operand(0x3f000000u/*0.5*/));
+ Operand(0x3f000000u/*0.5*/),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma));
tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma), Operand(0x3f000000u/*0.5*/));
+ Operand(0x3f000000u/*0.5*/),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma));
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
break;
}
unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
if (vtx_info->chan_byte_size != 4 && channels == 3)
return false;
- return (ctx->options->chip_class != GFX6 && ctx->options->chip_class != GFX10) ||
+ return (ctx->options->chip_class >= GFX7 && ctx->options->chip_class <= GFX9) ||
(offset % vertex_byte_size == 0 && stride % vertex_byte_size == 0);
}