i965/vec4: add a helper function to create double immediates
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 9 Mar 2016 15:37:33 +0000 (16:37 +0100)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Tue, 3 Jan 2017 10:26:50 +0000 (11:26 +0100)
Gen7 hardware does not support double immediates so these need
to be moved in 32-bit chunks to a regular vgrf instead. Instead
of doing this every time we need to create a DF immediate,
create a helper function that does the right thing depending
on the hardware generation.

v2 (Curro):
  - Use swizzle() and writemask() helpers and make tmp const.

v3 (Iago):
  - Adapt to changes in offset()

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index 396e15844fd95d6177d7759cb9c502b5bc746473..f9a76c5f35f83934458eb4fd96ccb1b1554a5187 100644 (file)
@@ -320,6 +320,8 @@ public:
    void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate,
                                   brw_reg_type single_type);
 
+   src_reg setup_imm_df(double v);
+
    virtual void emit_nir_code();
    virtual void nir_setup_uniforms();
    virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
index 8c9b98347bfa086f845b3c128af4b6ecdb506a42..312f30bff38c6770c30af5c8f3a291f037ce5a72 100644 (file)
@@ -1109,6 +1109,44 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src,
    inst->saturate = saturate;
 }
 
+src_reg
+vec4_visitor::setup_imm_df(double v)
+{
+   assert(devinfo->gen >= 7);
+
+   if (devinfo->gen >= 8)
+      return brw_imm_df(v);
+
+   /* gen7 does not support DF immediates */
+   union {
+      double d;
+      struct {
+         uint32_t i1;
+         uint32_t i2;
+      };
+   } di;
+
+   di.d = v;
+
+   /* Write the low 32-bit of the constant to the X:UD channel and the
+    * high 32-bit to the Y:UD channel to build the constant in a VGRF.
+    * We have to do this twice (offset 0 and offset 1), since a DF VGRF takes
+    * two SIMD8 registers in SIMD4x2 execution. Finally, return a swizzle
+    * XXXX so any access to the VGRF only reads the constant data in these
+    * channels.
+    */
+   const dst_reg tmp =
+      retype(dst_reg(VGRF, alloc.allocate(2)), BRW_REGISTER_TYPE_UD);
+   for (int n = 0; n < 2; n++) {
+      emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), brw_imm_ud(di.i1)))
+         ->force_writemask_all = true;
+      emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), brw_imm_ud(di.i2)))
+         ->force_writemask_all = true;
+   }
+
+   return swizzle(src_reg(retype(tmp, BRW_REGISTER_TYPE_DF)), BRW_SWIZZLE_XXXX);
+}
+
 void
 vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 {