From: Iago Toral Quiroga Date: Fri, 22 Jan 2016 13:00:38 +0000 (+0100) Subject: i965/fs: add shuffle_32bit_load_result_to_64bit_data helper X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=50b7676dc46bae39c5e9b779828ef4fb2e1fbefc;p=mesa.git i965/fs: add shuffle_32bit_load_result_to_64bit_data helper There will be a few places where we need to shuffle the result of a 32-bit load into valid 64-bit data, so extract this logic into a separate helper that we can reuse. v2 (Curro): - Use subscript() instead of stride() - Assert on the input types rather than retyping. - Use offset() instead of horiz_offset(), drop the multiplier definition. - Don't use force_writemask_all. - Mark component_i as const. - Make the function name lower case. v3 (Curro): - Pass src and dst by reference. - Move to brw_fs_nir.cpp Reviewed-by: Kenneth Graunke Reviewed-by: Francisco Jerez --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d4eb8fb7be4..286e7186d1f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -535,3 +535,8 @@ private: bool brw_do_channel_expressions(struct exec_list *instructions); bool brw_do_vector_splitting(struct exec_list *instructions); + +void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7458b73b1d4..584a0d6bd52 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3980,3 +3980,56 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) unreachable("unknown jump"); } } + +/** + * This helper takes the result of a load operation that reads 32-bit elements + * in this format: + * + * x x x x x x x x + * y y y y y y y y + * z z z z z z z z + * w w w w w w w w + * + * and shuffles the data to get this: + * + * x y x y x y x y + * x y x y x y x y + * z w z w z w z w + * z w z w z w z w + * + * Which is exactly what we want if the load is reading 64-bit components + * like doubles, where x represents the low 32-bit of the x double component + * and y represents the high 32-bit of the x double component (likewise with + * z and w for double component y). The parameter @components represents + * the number of 64-bit components present in @src. This would typically be + * 2 at most, since we can only fit 2 double elements in the result of a + * vec4 load. + * + * Notice that @dst and @src can be the same register. + */ +void +shuffle_32bit_load_result_to_64bit_data(const fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components) +{ + assert(type_sz(src.type) == 4); + assert(type_sz(dst.type) == 8); + + /* A temporary that we will use to shuffle the 32-bit data of each + * component in the vector into valid 64-bit data. We can't write directly + * to dst because dst can be (and would usually be) the same as src + * and in that case the first MOV in the loop below would overwrite the + * data read in the second MOV. + */ + fs_reg tmp = bld.vgrf(dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = offset(src, bld, 2 * i); + + bld.MOV(subscript(tmp, src.type, 0), component_i); + bld.MOV(subscript(tmp, src.type, 1), offset(component_i, bld, 1)); + + bld.MOV(offset(dst, bld, i), tmp); + } +}