i965/nir/vec4: Implement single-element "mov" operations
[mesa.git] / src / mesa / drivers / dri / i965 / brw_reg.h
index 3ee3543d4e428b33ef1c1f68519b2f010fdfb264..31806f769bd48da85c48092f93a916f220ed6879 100644 (file)
@@ -1,6 +1,6 @@
 /*
  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ Intel funded Tungsten Graphics to
  develop this 3D driver.
 
  Permission is hereby granted, free of charge, to any person obtaining
@@ -26,7 +26,7 @@
  **********************************************************************/
  /*
   * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Keith Whitwell <keithw@vmware.com>
   */
 
 /** @file brw_reg.h
@@ -43,6 +43,9 @@
 #define BRW_REG_H
 
 #include <stdbool.h>
+#include "main/imports.h"
+#include "main/compiler.h"
+#include "main/macros.h"
 #include "program/prog_instruction.h"
 #include "brw_defines.h"
 
@@ -50,6 +53,8 @@
 extern "C" {
 #endif
 
+struct brw_device_info;
+
 /** Number of general purpose registers (VS, WM, etc) */
 #define BRW_MAX_GRF 128
 
@@ -77,10 +82,12 @@ extern "C" {
 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
 
 static inline bool
-brw_is_single_value_swizzle(int swiz)
+brw_is_single_value_swizzle(unsigned swiz)
 {
    return (swiz == BRW_SWIZZLE_XXXX ||
            swiz == BRW_SWIZZLE_YYYY ||
@@ -88,6 +95,133 @@ brw_is_single_value_swizzle(int swiz)
            swiz == BRW_SWIZZLE_WWWW);
 }
 
+/**
+ * Compute the swizzle obtained from the application of \p swz0 on the result
+ * of \p swz1.  The argument ordering is expected to match function
+ * composition.
+ */
+static inline unsigned
+brw_compose_swizzle(unsigned swz0, unsigned swz1)
+{
+   return BRW_SWIZZLE4(
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
+}
+
+/**
+ * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
+ * (AKA image).
+ */
+static inline unsigned
+brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+   unsigned result = 0;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (mask & (1 << BRW_GET_SWZ(swz, i)))
+         result |= 1 << i;
+   }
+
+   return result;
+}
+
+/**
+ * Return the result of applying the inverse of swizzle \p swz to shuffle the
+ * bits of \p mask (AKA preimage).  Useful to find out which components are
+ * read from a swizzled source given the instruction writemask.
+ */
+static inline unsigned
+brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+   unsigned result = 0;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (mask & (1 << i))
+         result |= 1 << BRW_GET_SWZ(swz, i);
+   }
+
+   return result;
+}
+
+/**
+ * Construct an identity swizzle for the set of enabled channels given by \p
+ * mask.  The result will only reference channels enabled in the provided \p
+ * mask, assuming that \p mask is non-zero.  The constructed swizzle will
+ * satisfy the property that for any instruction OP and any mask:
+ *
+ *    brw_OP(p, brw_writemask(dst, mask),
+ *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
+ *
+ * will be equivalent to the same instruction without swizzle:
+ *
+ *    brw_OP(p, brw_writemask(dst, mask), src);
+ */
+static inline unsigned
+brw_swizzle_for_mask(unsigned mask)
+{
+   unsigned last = (mask ? ffs(mask) - 1 : 0);
+   unsigned swz[4];
+
+   for (unsigned i = 0; i < 4; i++)
+      last = swz[i] = (mask & (1 << i) ? i : last);
+
+   return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+}
+
+/**
+ * Construct an identity swizzle for the first \p n components of a vector.
+ * When only a subset of channels of a vec4 are used we don't want to
+ * reference the other channels, as that will tell optimization passes that
+ * those other channels are used.
+ */
+static inline unsigned
+brw_swizzle_for_size(unsigned n)
+{
+   return brw_swizzle_for_mask((1 << n) - 1);
+}
+
+/**
+ * Converse of brw_swizzle_for_mask().  Returns the mask of components
+ * accessed by the specified swizzle \p swz.
+ */
+static inline unsigned
+brw_mask_for_swizzle(unsigned swz)
+{
+   return brw_apply_inv_swizzle_to_mask(swz, ~0);
+}
+
+enum PACKED brw_reg_type {
+   BRW_REGISTER_TYPE_UD = 0,
+   BRW_REGISTER_TYPE_D,
+   BRW_REGISTER_TYPE_UW,
+   BRW_REGISTER_TYPE_W,
+   BRW_REGISTER_TYPE_F,
+
+   /** Non-immediates only: @{ */
+   BRW_REGISTER_TYPE_UB,
+   BRW_REGISTER_TYPE_B,
+   /** @} */
+
+   /** Immediates only: @{ */
+   BRW_REGISTER_TYPE_UV,
+   BRW_REGISTER_TYPE_V,
+   BRW_REGISTER_TYPE_VF,
+   /** @} */
+
+   BRW_REGISTER_TYPE_DF, /* Gen7+ (no immediates until Gen8+) */
+
+   /* Gen8+ */
+   BRW_REGISTER_TYPE_HF,
+   BRW_REGISTER_TYPE_UQ,
+   BRW_REGISTER_TYPE_Q,
+};
+
+unsigned brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
+                                 enum brw_reg_type type, unsigned file);
+const char *brw_reg_type_letters(unsigned brw_reg_type);
+
 #define REG_SIZE (8*4)
 
 /* These aren't hardware structs, just something useful for us to pass around:
@@ -97,7 +231,7 @@ brw_is_single_value_swizzle(int swiz)
  * or "structure of array" form:
  */
 struct brw_reg {
-   unsigned type:4;
+   enum brw_reg_type type:4;
    unsigned file:2;
    unsigned nr:8;
    unsigned subnr:5;              /* :1 in align16 */
@@ -131,15 +265,17 @@ struct brw_indirect {
 };
 
 
-static inline int
+static inline unsigned
 type_sz(unsigned type)
 {
    switch(type) {
+   case BRW_REGISTER_TYPE_UQ:
+   case BRW_REGISTER_TYPE_Q:
+      return 8;
    case BRW_REGISTER_TYPE_UD:
    case BRW_REGISTER_TYPE_D:
    case BRW_REGISTER_TYPE_F:
       return 4;
-   case BRW_REGISTER_TYPE_HF:
    case BRW_REGISTER_TYPE_UW:
    case BRW_REGISTER_TYPE_W:
       return 2;
@@ -151,11 +287,40 @@ type_sz(unsigned type)
    }
 }
 
+static inline bool
+type_is_signed(unsigned type)
+{
+   switch(type) {
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_W:
+   case BRW_REGISTER_TYPE_F:
+   case BRW_REGISTER_TYPE_B:
+   case BRW_REGISTER_TYPE_V:
+   case BRW_REGISTER_TYPE_VF:
+   case BRW_REGISTER_TYPE_DF:
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_Q:
+      return true;
+
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_UV:
+   case BRW_REGISTER_TYPE_UQ:
+      return false;
+
+   default:
+      unreachable("not reached");
+   }
+}
+
 /**
  * Construct a brw_reg.
  * \param file      one of the BRW_x_REGISTER_FILE values
  * \param nr        register number/index
  * \param subnr     register sub number
+ * \param negate    register negate modifier
+ * \param abs       register abs modifier
  * \param type      one of BRW_REGISTER_TYPE_x
  * \param vstride   one of BRW_VERTICAL_STRIDE_x
  * \param width     one of BRW_WIDTH_x
@@ -167,7 +332,9 @@ static inline struct brw_reg
 brw_reg(unsigned file,
         unsigned nr,
         unsigned subnr,
-        unsigned type,
+        unsigned negate,
+        unsigned abs,
+        enum brw_reg_type type,
         unsigned vstride,
         unsigned width,
         unsigned hstride,
@@ -186,8 +353,8 @@ brw_reg(unsigned file,
    reg.file = file;
    reg.nr = nr;
    reg.subnr = subnr * type_sz(type);
-   reg.negate = 0;
-   reg.abs = 0;
+   reg.negate = negate;
+   reg.abs = abs;
    reg.vstride = vstride;
    reg.width = width;
    reg.hstride = hstride;
@@ -214,6 +381,8 @@ brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_16,
                   BRW_WIDTH_16,
@@ -229,6 +398,8 @@ brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_8,
                   BRW_WIDTH_8,
@@ -244,6 +415,8 @@ brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_4,
                   BRW_WIDTH_4,
@@ -259,6 +432,8 @@ brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_2,
                   BRW_WIDTH_2,
@@ -274,6 +449,8 @@ brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_0,
                   BRW_WIDTH_1,
@@ -282,14 +459,38 @@ brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
                   WRITEMASK_X);
 }
 
+static inline struct brw_reg
+brw_vecn_reg(unsigned width, unsigned file, unsigned nr, unsigned subnr)
+{
+   switch (width) {
+   case 1:
+      return brw_vec1_reg(file, nr, subnr);
+   case 2:
+      return brw_vec2_reg(file, nr, subnr);
+   case 4:
+      return brw_vec4_reg(file, nr, subnr);
+   case 8:
+      return brw_vec8_reg(file, nr, subnr);
+   case 16:
+      return brw_vec16_reg(file, nr, subnr);
+   default:
+      unreachable("Invalid register width");
+   }
+}
 
 static inline struct brw_reg
-retype(struct brw_reg reg, unsigned type)
+retype(struct brw_reg reg, enum brw_reg_type type)
 {
    reg.type = type;
    return reg;
 }
 
+static inline struct brw_reg
+firsthalf(struct brw_reg reg)
+{
+   return reg;
+}
+
 static inline struct brw_reg
 sechalf(struct brw_reg reg)
 {
@@ -346,9 +547,11 @@ brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
 }
 
 static inline struct brw_reg
-brw_imm_reg(unsigned type)
+brw_imm_reg(enum brw_reg_type type)
 {
    return brw_reg(BRW_IMMEDIATE_VALUE,
+                  0,
+                  0,
                   0,
                   0,
                   type,
@@ -432,19 +635,38 @@ brw_imm_vf(unsigned v)
    return imm;
 }
 
-#define VF_ZERO 0x0
-#define VF_ONE  0x30
-#define VF_NEG  (1<<7)
+/**
+ * Convert an integer into a "restricted" 8-bit float, used in vector
+ * immediates.  The 8-bit floating point format has a sign bit, an
+ * excess-3 3-bit exponent, and a 4-bit mantissa.  All integer values
+ * from -31 to 31 can be represented exactly.
+ */
+static inline uint8_t
+int_to_float8(int x)
+{
+   if (x == 0) {
+      return 0;
+   } else if (x < 0) {
+      return 1 << 7 | int_to_float8(-x);
+   } else {
+      const unsigned exponent = _mesa_logbase2(x);
+      const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent);
+      assert(exponent <= 4);
+      return (exponent + 3) << 4 | mantissa;
+   }
+}
 
+/**
+ * Construct a floating-point packed vector immediate from its integer
+ * values. \sa int_to_float8()
+ */
 static inline struct brw_reg
-brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+brw_imm_vf4(int v0, int v1, int v2, int v3)
 {
-   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
-   imm.vstride = BRW_VERTICAL_STRIDE_0;
-   imm.width = BRW_WIDTH_4;
-   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
-   imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
-   return imm;
+   return brw_imm_vf((int_to_float8(v0) << 0) |
+                     (int_to_float8(v1) << 8) |
+                     (int_to_float8(v2) << 16) |
+                     (int_to_float8(v3) << 24));
 }
 
 
@@ -482,6 +704,13 @@ brw_vec8_grf(unsigned nr, unsigned subnr)
    return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
+/** Construct float[16] general-purpose register */
+static inline struct brw_reg
+brw_vec16_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
 
 static inline struct brw_reg
 brw_uw8_grf(unsigned nr, unsigned subnr)
@@ -503,6 +732,12 @@ brw_null_reg(void)
    return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
 }
 
+static inline struct brw_reg
+brw_null_vec(unsigned width)
+{
+   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
 static inline struct brw_reg
 brw_address_reg(unsigned subnr)
 {
@@ -519,6 +754,8 @@ brw_ip_reg(void)
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                   BRW_ARF_IP,
                   0,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_UD,
                   BRW_VERTICAL_STRIDE_4, /* ? */
                   BRW_WIDTH_1,
@@ -528,18 +765,13 @@ brw_ip_reg(void)
 }
 
 static inline struct brw_reg
-brw_acc_reg(void)
-{
-   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
-}
-
-static inline struct brw_reg
-brw_notification_1_reg(void)
+brw_notification_reg(void)
 {
-
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                   BRW_ARF_NOTIFICATION_COUNT,
-                  1,
+                  0,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_UD,
                   BRW_VERTICAL_STRIDE_0,
                   BRW_WIDTH_1,
@@ -548,6 +780,12 @@ brw_notification_1_reg(void)
                   WRITEMASK_X);
 }
 
+static inline struct brw_reg
+brw_acc_reg(unsigned width)
+{
+   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_ACCUMULATOR, 0);
+}
 
 static inline struct brw_reg
 brw_flag_reg(int reg, int subreg)
@@ -556,7 +794,11 @@ brw_flag_reg(int reg, int subreg)
                       BRW_ARF_FLAG + reg, subreg);
 }
 
-
+/**
+ * Return the mask register present in Gen4-5, or the related register present
+ * in Gen7.5 and later hardware referred to as "channel enable" register in
+ * the documentation.
+ */
 static inline struct brw_reg
 brw_mask_reg(unsigned subnr)
 {
@@ -570,6 +812,12 @@ brw_message_reg(unsigned nr)
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
 }
 
+static inline struct brw_reg
+brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
+{
+   return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
+                 BRW_REGISTER_TYPE_UD);
+}
 
 /* This is almost always called with a numeric constant argument, so
  * make things easy to evaluate at compile time:
@@ -597,6 +845,27 @@ stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
    return reg;
 }
 
+/**
+ * Multiply the vertical and horizontal stride of a register by the given
+ * factor \a s.
+ */
+static inline struct brw_reg
+spread(struct brw_reg reg, unsigned s)
+{
+   if (s) {
+      assert(_mesa_is_pow_two(s));
+
+      if (reg.hstride)
+         reg.hstride += cvt(s) - 1;
+
+      if (reg.vstride)
+         reg.vstride += cvt(s) - 1;
+
+      return reg;
+   } else {
+      return stride(reg, 0, 1, 0);
+   }
+}
 
 static inline struct brw_reg
 vec16(struct brw_reg reg)
@@ -653,10 +922,8 @@ brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
 
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   reg.dw1.bits.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
+                                              reg.dw1.bits.swizzle);
    return reg;
 }
 
@@ -683,6 +950,12 @@ brw_set_writemask(struct brw_reg reg, unsigned mask)
    return reg;
 }
 
+static inline unsigned
+brw_writemask_for_size(unsigned n)
+{
+   return (1 << n) - 1;
+}
+
 static inline struct brw_reg
 negate(struct brw_reg reg)
 {
@@ -779,6 +1052,23 @@ brw_indirect(unsigned addr_subnr, int offset)
    return ptr;
 }
 
+static inline bool
+region_matches(struct brw_reg reg, enum brw_vertical_stride v,
+               enum brw_width w, enum brw_horizontal_stride h)
+{
+   return reg.vstride == v &&
+          reg.width == w &&
+          reg.hstride == h;
+}
+
+#define has_scalar_region(reg) \
+   region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
+                  BRW_HORIZONTAL_STRIDE_0)
+
+/* brw_packed_float.c */
+int brw_float_to_vf(float f);
+float brw_vf_to_float(unsigned char vf);
+
 #ifdef __cplusplus
 }
 #endif