fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4);
switch (type_sz(dst.type)) {
case 2:
- shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1);
+ shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 0, 1);
bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1));
break;
case 4:
void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld,
const fs_reg &dst,
const fs_reg &src,
+ uint32_t first_component,
uint32_t components);
void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld,
shuffle_32bit_load_result_to_16bit_data(bld,
retype(dest, BRW_REGISTER_TYPE_W),
retype(read_result, BRW_REGISTER_TYPE_D),
- num_components);
+ 0, num_components);
} else {
assert(num_components == 1);
/* scalar 16-bit are read using one byte_scattered_read message */
shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld,
const fs_reg &dst,
const fs_reg &src,
+ uint32_t first_component,
uint32_t components)
{
assert(type_sz(src.type) == 4);
for (unsigned i = 0; i < components; i++) {
const fs_reg component_i =
- subscript(offset(src, bld, i / 2), dst.type, i % 2);
+ subscript(offset(src, bld, (first_component + i) / 2), dst.type,
+ (first_component + i) % 2);
bld.MOV(offset(tmp, bld, i % 2), component_i);