i965/fs: Define new shader opcode to set rounding modes
authorAlejandro Piñeiro <apinheiro@igalia.com>
Sat, 1 Jul 2017 06:12:59 +0000 (08:12 +0200)
committerJose Maria Casanova Crespo <jmcasanova@igalia.com>
Wed, 6 Dec 2017 07:57:18 +0000 (08:57 +0100)
Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
    with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.
v3: (Curro)
  - Remove redundant and add missing whitespace lines.
  - Match printing instruction to IR opcode "rnd_mode"

v4: (Topi Pohjolainen)
  - Fix code style.

Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_shader.cpp

index b5a206b3f100589879fb23b76603d4704e99e12f..343dcd867db63dc7149cd1552555c29cdf9ad1b2 100644 (file)
@@ -510,6 +510,10 @@ brw_broadcast(struct brw_codegen *p,
               struct brw_reg src,
               struct brw_reg idx);
 
+void
+brw_rounding_mode(struct brw_codegen *p,
+                  enum brw_rnd_mode mode);
+
 /***********************************************************************
  * brw_eu_util.c:
  */
index 291dd361a29727564e8f915145f0d60ddf6755db..8a8f36cbc114ea4f33f492a249dfda149f346de1 100644 (file)
@@ -400,6 +400,8 @@ enum opcode {
    SHADER_OPCODE_TYPED_SURFACE_WRITE,
    SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
+   SHADER_OPCODE_RND_MODE,
+
    SHADER_OPCODE_MEMORY_FENCE,
 
    SHADER_OPCODE_GEN4_SCRATCH_READ,
@@ -1238,4 +1240,18 @@ enum brw_message_target {
 /* R0 */
 # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT             27
 
+/* CR0.0[5:4] Floating-Point Rounding Modes
+ *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
+ */
+
+#define BRW_CR0_RND_MODE_MASK     0x30
+#define BRW_CR0_RND_MODE_SHIFT    4
+
+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
+   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
+   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
+   BRW_RND_MODE_RTZ = 3,   /* Round Toward Zero */
+};
+
 #endif /* BRW_EU_DEFINES_H */
index dc14023b4845e43fc4e5c277804ae2ac2dd53a8c..ca97ff7325e95b906b4678399bedda9d122f0162 100644 (file)
@@ -3589,3 +3589,36 @@ brw_WAIT(struct brw_codegen *p)
    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 }
+
+/**
+ * Changes the floating point rounding mode updating the control register
+ * field defined at cr0.0[5-6] bits. This function supports the changes to
+ * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
+ * Only RTNE and RTZ rounding are enabled at nir.
+ */
+void
+brw_rounding_mode(struct brw_codegen *p,
+                  enum brw_rnd_mode mode)
+{
+   const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT;
+
+   if (bits != BRW_CR0_RND_MODE_MASK) {
+      brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+                               brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
+
+      /* From the Skylake PRM, Volume 7, page 760:
+       *  "Implementation Restriction on Register Access: When the control
+       *   register is used as an explicit source and/or destination, hardware
+       *   does not ensure execution pipeline coherency. Software must set the
+       *   thread control field to ‘switch’ for an instruction that uses
+       *   control register as an explicit operand."
+       */
+      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+    }
+
+   if (bits) {
+      brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+                              brw_imm_ud(bits));
+      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+   }
+}
index 4f90ec9dfff6c8de5f3ecd5e06e1a3257adaea6d..a5c39cc956b5348e65ccd7b08eea37b9dfc0eb4a 100644 (file)
@@ -2176,6 +2176,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
          break;
 
+      case SHADER_OPCODE_RND_MODE:
+         assert(src[0].file == BRW_IMMEDIATE_VALUE);
+         brw_rounding_mode(p, (brw_rnd_mode) src[0].d);
+         break;
+
       default:
          unreachable("Unsupported opcode");
 
index aa9e5f3d284d439eb297ea2b6a1088e27a7a92c5..d7d7616cf4f9a82f73dbf09bc35f673f141bd005 100644 (file)
@@ -482,6 +482,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "tes_add_indirect_urb_offset";
    case TES_OPCODE_GET_PRIMITIVE_ID:
       return "tes_get_primitive_id";
+
+   case SHADER_OPCODE_RND_MODE:
+      return "rnd_mode";
    }
 
    unreachable("not reached");
@@ -974,6 +977,7 @@ backend_instruction::has_side_effects() const
    case SHADER_OPCODE_BARRIER:
    case TCS_OPCODE_URB_WRITE:
    case TCS_OPCODE_RELEASE_INPUT:
+   case SHADER_OPCODE_RND_MODE:
       return true;
    default:
       return eot;