i965: Move MRF register asserts out of brw_reg.h
[mesa.git] / src / mesa / drivers / dri / i965 / brw_reg.h
index 9673210666ee24b696041389e406b6f68a5fd0e4..06d9269f4d8c11c63aeaba4450f1f055c2c9454f 100644 (file)
@@ -43,6 +43,9 @@
 #define BRW_REG_H
 
 #include <stdbool.h>
+#include "main/imports.h"
+#include "main/compiler.h"
+#include "main/macros.h"
 #include "program/prog_instruction.h"
 #include "brw_defines.h"
 
@@ -50,6 +53,8 @@
 extern "C" {
 #endif
 
+struct brw_device_info;
+
 /** Number of general purpose registers (VS, WM, etc) */
 #define BRW_MAX_GRF 128
 
@@ -77,10 +82,12 @@ extern "C" {
 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
 
 static inline bool
-brw_is_single_value_swizzle(int swiz)
+brw_is_single_value_swizzle(unsigned swiz)
 {
    return (swiz == BRW_SWIZZLE_XXXX ||
            swiz == BRW_SWIZZLE_YYYY ||
@@ -88,7 +95,104 @@ brw_is_single_value_swizzle(int swiz)
            swiz == BRW_SWIZZLE_WWWW);
 }
 
-enum brw_reg_type {
+/**
+ * Compute the swizzle obtained from the application of \p swz0 on the result
+ * of \p swz1.  The argument ordering is expected to match function
+ * composition.
+ */
+static inline unsigned
+brw_compose_swizzle(unsigned swz0, unsigned swz1)
+{
+   return BRW_SWIZZLE4(
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
+      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
+}
+
+/**
+ * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
+ * (AKA image).
+ */
+static inline unsigned
+brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+   unsigned result = 0;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (mask & (1 << BRW_GET_SWZ(swz, i)))
+         result |= 1 << i;
+   }
+
+   return result;
+}
+
+/**
+ * Return the result of applying the inverse of swizzle \p swz to shuffle the
+ * bits of \p mask (AKA preimage).  Useful to find out which components are
+ * read from a swizzled source given the instruction writemask.
+ */
+static inline unsigned
+brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+   unsigned result = 0;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (mask & (1 << i))
+         result |= 1 << BRW_GET_SWZ(swz, i);
+   }
+
+   return result;
+}
+
+/**
+ * Construct an identity swizzle for the set of enabled channels given by \p
+ * mask.  The result will only reference channels enabled in the provided \p
+ * mask, assuming that \p mask is non-zero.  The constructed swizzle will
+ * satisfy the property that for any instruction OP and any mask:
+ *
+ *    brw_OP(p, brw_writemask(dst, mask),
+ *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
+ *
+ * will be equivalent to the same instruction without swizzle:
+ *
+ *    brw_OP(p, brw_writemask(dst, mask), src);
+ */
+static inline unsigned
+brw_swizzle_for_mask(unsigned mask)
+{
+   unsigned last = (mask ? ffs(mask) - 1 : 0);
+   unsigned swz[4];
+
+   for (unsigned i = 0; i < 4; i++)
+      last = swz[i] = (mask & (1 << i) ? i : last);
+
+   return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+}
+
+/**
+ * Construct an identity swizzle for the first \p n components of a vector.
+ * When only a subset of channels of a vec4 are used we don't want to
+ * reference the other channels, as that will tell optimization passes that
+ * those other channels are used.
+ */
+static inline unsigned
+brw_swizzle_for_size(unsigned n)
+{
+   return brw_swizzle_for_mask((1 << n) - 1);
+}
+
+/**
+ * Converse of brw_swizzle_for_mask().  Returns the mask of components
+ * accessed by the specified swizzle \p swz.
+ */
+static inline unsigned
+brw_mask_for_swizzle(unsigned swz)
+{
+   return brw_apply_inv_swizzle_to_mask(swz, ~0);
+}
+
+enum PACKED brw_reg_type {
    BRW_REGISTER_TYPE_UD = 0,
    BRW_REGISTER_TYPE_D,
    BRW_REGISTER_TYPE_UW,
@@ -114,8 +218,9 @@ enum brw_reg_type {
    BRW_REGISTER_TYPE_Q,
 };
 
-unsigned brw_reg_type_to_hw_type(const struct brw_context *brw,
+unsigned brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
                                  enum brw_reg_type type, unsigned file);
+const char *brw_reg_type_letters(unsigned brw_reg_type);
 
 #define REG_SIZE (8*4)
 
@@ -126,7 +231,7 @@ unsigned brw_reg_type_to_hw_type(const struct brw_context *brw,
  * or "structure of array" form:
  */
 struct brw_reg {
-   unsigned type:4;
+   enum brw_reg_type type:4;
    unsigned file:2;
    unsigned nr:8;
    unsigned subnr:5;              /* :1 in align16 */
@@ -160,10 +265,13 @@ struct brw_indirect {
 };
 
 
-static inline int
+static inline unsigned
 type_sz(unsigned type)
 {
    switch(type) {
+   case BRW_REGISTER_TYPE_UQ:
+   case BRW_REGISTER_TYPE_Q:
+      return 8;
    case BRW_REGISTER_TYPE_UD:
    case BRW_REGISTER_TYPE_D:
    case BRW_REGISTER_TYPE_F:
@@ -179,11 +287,40 @@ type_sz(unsigned type)
    }
 }
 
+static inline bool
+type_is_signed(unsigned type)
+{
+   switch(type) {
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_W:
+   case BRW_REGISTER_TYPE_F:
+   case BRW_REGISTER_TYPE_B:
+   case BRW_REGISTER_TYPE_V:
+   case BRW_REGISTER_TYPE_VF:
+   case BRW_REGISTER_TYPE_DF:
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_Q:
+      return true;
+
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_UV:
+   case BRW_REGISTER_TYPE_UQ:
+      return false;
+
+   default:
+      unreachable("not reached");
+   }
+}
+
 /**
  * Construct a brw_reg.
  * \param file      one of the BRW_x_REGISTER_FILE values
  * \param nr        register number/index
  * \param subnr     register sub number
+ * \param negate    register negate modifier
+ * \param abs       register abs modifier
  * \param type      one of BRW_REGISTER_TYPE_x
  * \param vstride   one of BRW_VERTICAL_STRIDE_x
  * \param width     one of BRW_WIDTH_x
@@ -195,7 +332,9 @@ static inline struct brw_reg
 brw_reg(unsigned file,
         unsigned nr,
         unsigned subnr,
-        unsigned type,
+        unsigned negate,
+        unsigned abs,
+        enum brw_reg_type type,
         unsigned vstride,
         unsigned width,
         unsigned hstride,
@@ -205,17 +344,19 @@ brw_reg(unsigned file,
    struct brw_reg reg;
    if (file == BRW_GENERAL_REGISTER_FILE)
       assert(nr < BRW_MAX_GRF);
-   else if (file == BRW_MESSAGE_REGISTER_FILE)
-      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
       assert(nr <= BRW_ARF_TIMESTAMP);
+   /* Asserting on the MRF register number requires to know the hardware gen
+    * (gen6 has 24 MRF registers), which we don't know here, so we assert
+    * for that in the generators and in brw_eu_emit.c
+    */
 
    reg.type = type;
    reg.file = file;
    reg.nr = nr;
    reg.subnr = subnr * type_sz(type);
-   reg.negate = 0;
-   reg.abs = 0;
+   reg.negate = negate;
+   reg.abs = abs;
    reg.vstride = vstride;
    reg.width = width;
    reg.hstride = hstride;
@@ -242,6 +383,8 @@ brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_16,
                   BRW_WIDTH_16,
@@ -257,6 +400,8 @@ brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_8,
                   BRW_WIDTH_8,
@@ -272,6 +417,8 @@ brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_4,
                   BRW_WIDTH_4,
@@ -287,6 +434,8 @@ brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_2,
                   BRW_WIDTH_2,
@@ -302,6 +451,8 @@ brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
    return brw_reg(file,
                   nr,
                   subnr,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_F,
                   BRW_VERTICAL_STRIDE_0,
                   BRW_WIDTH_1,
@@ -325,18 +476,23 @@ brw_vecn_reg(unsigned width, unsigned file, unsigned nr, unsigned subnr)
    case 16:
       return brw_vec16_reg(file, nr, subnr);
    default:
-      assert(!"Invalid register width");
+      unreachable("Invalid register width");
    }
-   unreachable();
 }
 
 static inline struct brw_reg
-retype(struct brw_reg reg, unsigned type)
+retype(struct brw_reg reg, enum brw_reg_type type)
 {
    reg.type = type;
    return reg;
 }
 
+static inline struct brw_reg
+firsthalf(struct brw_reg reg)
+{
+   return reg;
+}
+
 static inline struct brw_reg
 sechalf(struct brw_reg reg)
 {
@@ -393,9 +549,11 @@ brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
 }
 
 static inline struct brw_reg
-brw_imm_reg(unsigned type)
+brw_imm_reg(enum brw_reg_type type)
 {
    return brw_reg(BRW_IMMEDIATE_VALUE,
+                  0,
+                  0,
                   0,
                   0,
                   type,
@@ -479,19 +637,38 @@ brw_imm_vf(unsigned v)
    return imm;
 }
 
-#define VF_ZERO 0x0
-#define VF_ONE  0x30
-#define VF_NEG  (1<<7)
+/**
+ * Convert an integer into a "restricted" 8-bit float, used in vector
+ * immediates.  The 8-bit floating point format has a sign bit, an
+ * excess-3 3-bit exponent, and a 4-bit mantissa.  All integer values
+ * from -31 to 31 can be represented exactly.
+ */
+static inline uint8_t
+int_to_float8(int x)
+{
+   if (x == 0) {
+      return 0;
+   } else if (x < 0) {
+      return 1 << 7 | int_to_float8(-x);
+   } else {
+      const unsigned exponent = _mesa_logbase2(x);
+      const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent);
+      assert(exponent <= 4);
+      return (exponent + 3) << 4 | mantissa;
+   }
+}
 
+/**
+ * Construct a floating-point packed vector immediate from its integer
+ * values. \sa int_to_float8()
+ */
 static inline struct brw_reg
-brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+brw_imm_vf4(int v0, int v1, int v2, int v3)
 {
-   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
-   imm.vstride = BRW_VERTICAL_STRIDE_0;
-   imm.width = BRW_WIDTH_4;
-   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
-   imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
-   return imm;
+   return brw_imm_vf((int_to_float8(v0) << 0) |
+                     (int_to_float8(v1) << 8) |
+                     (int_to_float8(v2) << 16) |
+                     (int_to_float8(v3) << 24));
 }
 
 
@@ -529,6 +706,13 @@ brw_vec8_grf(unsigned nr, unsigned subnr)
    return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
+/** Construct float[16] general-purpose register */
+static inline struct brw_reg
+brw_vec16_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
 
 static inline struct brw_reg
 brw_uw8_grf(unsigned nr, unsigned subnr)
@@ -550,6 +734,12 @@ brw_null_reg(void)
    return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
 }
 
+static inline struct brw_reg
+brw_null_vec(unsigned width)
+{
+   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
 static inline struct brw_reg
 brw_address_reg(unsigned subnr)
 {
@@ -566,6 +756,8 @@ brw_ip_reg(void)
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                   BRW_ARF_IP,
                   0,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_UD,
                   BRW_VERTICAL_STRIDE_4, /* ? */
                   BRW_WIDTH_1,
@@ -575,18 +767,13 @@ brw_ip_reg(void)
 }
 
 static inline struct brw_reg
-brw_acc_reg(void)
-{
-   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
-}
-
-static inline struct brw_reg
-brw_notification_1_reg(void)
+brw_notification_reg(void)
 {
-
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                   BRW_ARF_NOTIFICATION_COUNT,
-                  1,
+                  0,
+                  0,
+                  0,
                   BRW_REGISTER_TYPE_UD,
                   BRW_VERTICAL_STRIDE_0,
                   BRW_WIDTH_1,
@@ -595,6 +782,12 @@ brw_notification_1_reg(void)
                   WRITEMASK_X);
 }
 
+static inline struct brw_reg
+brw_acc_reg(unsigned width)
+{
+   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_ACCUMULATOR, 0);
+}
 
 static inline struct brw_reg
 brw_flag_reg(int reg, int subreg)
@@ -603,7 +796,11 @@ brw_flag_reg(int reg, int subreg)
                       BRW_ARF_FLAG + reg, subreg);
 }
 
-
+/**
+ * Return the mask register present in Gen4-5, or the related register present
+ * in Gen7.5 and later hardware referred to as "channel enable" register in
+ * the documentation.
+ */
 static inline struct brw_reg
 brw_mask_reg(unsigned subnr)
 {
@@ -613,7 +810,6 @@ brw_mask_reg(unsigned subnr)
 static inline struct brw_reg
 brw_message_reg(unsigned nr)
 {
-   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
 }
 
@@ -650,6 +846,27 @@ stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
    return reg;
 }
 
+/**
+ * Multiply the vertical and horizontal stride of a register by the given
+ * factor \a s.
+ */
+static inline struct brw_reg
+spread(struct brw_reg reg, unsigned s)
+{
+   if (s) {
+      assert(_mesa_is_pow_two(s));
+
+      if (reg.hstride)
+         reg.hstride += cvt(s) - 1;
+
+      if (reg.vstride)
+         reg.vstride += cvt(s) - 1;
+
+      return reg;
+   } else {
+      return stride(reg, 0, 1, 0);
+   }
+}
 
 static inline struct brw_reg
 vec16(struct brw_reg reg)
@@ -706,10 +923,8 @@ brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
 
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
-                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   reg.dw1.bits.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
+                                              reg.dw1.bits.swizzle);
    return reg;
 }
 
@@ -736,6 +951,12 @@ brw_set_writemask(struct brw_reg reg, unsigned mask)
    return reg;
 }
 
+static inline unsigned
+brw_writemask_for_size(unsigned n)
+{
+   return (1 << n) - 1;
+}
+
 static inline struct brw_reg
 negate(struct brw_reg reg)
 {
@@ -832,6 +1053,23 @@ brw_indirect(unsigned addr_subnr, int offset)
    return ptr;
 }
 
+static inline bool
+region_matches(struct brw_reg reg, enum brw_vertical_stride v,
+               enum brw_width w, enum brw_horizontal_stride h)
+{
+   return reg.vstride == v &&
+          reg.width == w &&
+          reg.hstride == h;
+}
+
+#define has_scalar_region(reg) \
+   region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
+                  BRW_HORIZONTAL_STRIDE_0)
+
+/* brw_packed_float.c */
+int brw_float_to_vf(float f);
+float brw_vf_to_float(unsigned char vf);
+
 #ifdef __cplusplus
 }
 #endif