From: Iago Toral Quiroga Date: Wed, 9 Mar 2016 13:12:43 +0000 (+0100) Subject: i965/fs: add a helper function to create double immediates X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9e196e907ee87bff2b8c215df5e31a0cd1d1a322;p=mesa.git i965/fs: add a helper function to create double immediates Gen7 hardware does not support double immediates so these need to be moved in 32-bit chunks to a regular vgrf instead. Instead of doing this every time we need to create a DF immediate, create a helper function that does the right thing depending on the hardware generation. v2: - Define setup_imm_df() as an independent function (Curro) - Create a specific builder to get rid of some instruction field assignments (Curro). v3: - Get devinfo from builder (Kenneth) Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1f88f8fd026..ca977d4d343 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -512,3 +512,5 @@ void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld, const fs_reg &dst, const fs_reg &src, uint32_t components); +fs_reg setup_imm_df(const brw::fs_builder &bld, + double v); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 04ed42e6acd..b41fbf8bd48 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4547,3 +4547,41 @@ shuffle_64bit_data_for_32bit_write(const fs_builder &bld, bld.MOV(offset(dst, bld, 2 * i + 1), subscript(component_i, dst.type, 1)); } } + +fs_reg +setup_imm_df(const fs_builder &bld, double v) +{ + const struct brw_device_info *devinfo = bld.shader->devinfo; + assert(devinfo->gen >= 7); + + if (devinfo->gen >= 8) + return brw_imm_df(v); + + /* gen7 does not support DF immediates, so we generate a 64-bit constant by + * writing the low 32-bit of the constant to suboffset 0 of a VGRF and + * the high 32-bit to suboffset 4 and then applying a stride of 0. + * + * Alternatively, we could also produce a normal VGRF (without stride 0) + * by writing to all the channels in the VGRF, however, that would hit the + * gen7 bug where we have to split writes that span more than 1 register + * into instructions with a width of 4 (otherwise the write to the second + * register written runs into an execmask hardware bug) which isn't very + * nice. + */ + union { + double d; + struct { + uint32_t i1; + uint32_t i2; + }; + } di; + + di.d = v; + + const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); + ubld.MOV(tmp, brw_imm_ud(di.i1)); + ubld.MOV(horiz_offset(tmp, 1), brw_imm_ud(di.i2)); + + return component(retype(tmp, BRW_REGISTER_TYPE_DF), 0); +}