/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
**********************************************************************/
/*
* Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
+ * Keith Whitwell <keithw@vmware.com>
*/
/** @file brw_reg.h
#define BRW_REG_H
#include <stdbool.h>
+#include "main/imports.h"
+#include "main/compiler.h"
+#include "main/macros.h"
#include "program/prog_instruction.h"
#include "brw_defines.h"
extern "C" {
#endif
+struct brw_context;
+
/** Number of general purpose registers (VS, WM, etc) */
#define BRW_MAX_GRF 128
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
static inline bool
-brw_is_single_value_swizzle(int swiz)
+brw_is_single_value_swizzle(unsigned swiz)
{
return (swiz == BRW_SWIZZLE_XXXX ||
swiz == BRW_SWIZZLE_YYYY ||
swiz == BRW_SWIZZLE_WWWW);
}
-enum brw_reg_type {
+/**
+ * Compute the swizzle obtained from the application of \p swz0 on the result
+ * of \p swz1. The argument ordering is expected to match function
+ * composition.
+ */
+static inline unsigned
+brw_compose_swizzle(unsigned swz0, unsigned swz1)
+{
+ return BRW_SWIZZLE4(
+ BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
+ BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
+ BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
+ BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
+}
+
+/**
+ * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
+ * (AKA image).
+ */
+static inline unsigned
+brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+ unsigned result = 0;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (mask & (1 << BRW_GET_SWZ(swz, i)))
+ result |= 1 << i;
+ }
+
+ return result;
+}
+
+/**
+ * Return the result of applying the inverse of swizzle \p swz to shuffle the
+ * bits of \p mask (AKA preimage). Useful to find out which components are
+ * read from a swizzled source given the instruction writemask.
+ */
+static inline unsigned
+brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
+{
+ unsigned result = 0;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ result |= 1 << BRW_GET_SWZ(swz, i);
+ }
+
+ return result;
+}
+
+/**
+ * Construct an identity swizzle for the set of enabled channels given by \p
+ * mask. The result will only reference channels enabled in the provided \p
+ * mask, assuming that \p mask is non-zero. The constructed swizzle will
+ * satisfy the property that for any instruction OP and any mask:
+ *
+ * brw_OP(p, brw_writemask(dst, mask),
+ * brw_swizzle(src, brw_swizzle_for_mask(mask)));
+ *
+ * will be equivalent to the same instruction without swizzle:
+ *
+ * brw_OP(p, brw_writemask(dst, mask), src);
+ */
+static inline unsigned
+brw_swizzle_for_mask(unsigned mask)
+{
+ unsigned last = (mask ? ffs(mask) - 1 : 0);
+ unsigned swz[4];
+
+ for (unsigned i = 0; i < 4; i++)
+ last = swz[i] = (mask & (1 << i) ? i : last);
+
+ return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+}
+
+/**
+ * Construct an identity swizzle for the first \p n components of a vector.
+ * When only a subset of channels of a vec4 are used we don't want to
+ * reference the other channels, as that will tell optimization passes that
+ * those other channels are used.
+ */
+static inline unsigned
+brw_swizzle_for_size(unsigned n)
+{
+ return brw_swizzle_for_mask((1 << n) - 1);
+}
+
+/**
+ * Converse of brw_swizzle_for_mask(). Returns the mask of components
+ * accessed by the specified swizzle \p swz.
+ */
+static inline unsigned
+brw_mask_for_swizzle(unsigned swz)
+{
+ return brw_apply_inv_swizzle_to_mask(swz, ~0);
+}
+
+enum PACKED brw_reg_type {
BRW_REGISTER_TYPE_UD = 0,
BRW_REGISTER_TYPE_D,
BRW_REGISTER_TYPE_UW,
/** @} */
BRW_REGISTER_TYPE_DF, /* Gen7+ (no immediates until Gen8+) */
+
+ /* Gen8+ */
+ BRW_REGISTER_TYPE_HF,
+ BRW_REGISTER_TYPE_UQ,
+ BRW_REGISTER_TYPE_Q,
};
unsigned brw_reg_type_to_hw_type(const struct brw_context *brw,
enum brw_reg_type type, unsigned file);
+const char *brw_reg_type_letters(unsigned brw_reg_type);
#define REG_SIZE (8*4)
* or "structure of array" form:
*/
struct brw_reg {
- unsigned type:4;
+ enum brw_reg_type type:4;
unsigned file:2;
unsigned nr:8;
unsigned subnr:5; /* :1 in align16 */
type_sz(unsigned type)
{
switch(type) {
+ case BRW_REGISTER_TYPE_UQ:
+ case BRW_REGISTER_TYPE_Q:
+ return 8;
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
}
}
+static inline bool
+type_is_signed(unsigned type)
+{
+ switch(type) {
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_F:
+ case BRW_REGISTER_TYPE_B:
+ case BRW_REGISTER_TYPE_V:
+ case BRW_REGISTER_TYPE_VF:
+ case BRW_REGISTER_TYPE_DF:
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_Q:
+ return true;
+
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_UV:
+ case BRW_REGISTER_TYPE_UQ:
+ return false;
+
+ default:
+ unreachable("not reached");
+ }
+}
+
/**
* Construct a brw_reg.
* \param file one of the BRW_x_REGISTER_FILE values
* \param nr register number/index
* \param subnr register sub number
+ * \param negate register negate modifier
+ * \param abs register abs modifier
* \param type one of BRW_REGISTER_TYPE_x
* \param vstride one of BRW_VERTICAL_STRIDE_x
* \param width one of BRW_WIDTH_x
brw_reg(unsigned file,
unsigned nr,
unsigned subnr,
- unsigned type,
+ unsigned negate,
+ unsigned abs,
+ enum brw_reg_type type,
unsigned vstride,
unsigned width,
unsigned hstride,
reg.file = file;
reg.nr = nr;
reg.subnr = subnr * type_sz(type);
- reg.negate = 0;
- reg.abs = 0;
+ reg.negate = negate;
+ reg.abs = abs;
reg.vstride = vstride;
reg.width = width;
reg.hstride = hstride;
return brw_reg(file,
nr,
subnr,
+ 0,
+ 0,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_16,
BRW_WIDTH_16,
return brw_reg(file,
nr,
subnr,
+ 0,
+ 0,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_8,
BRW_WIDTH_8,
return brw_reg(file,
nr,
subnr,
+ 0,
+ 0,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_4,
BRW_WIDTH_4,
return brw_reg(file,
nr,
subnr,
+ 0,
+ 0,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_2,
BRW_WIDTH_2,
return brw_reg(file,
nr,
subnr,
+ 0,
+ 0,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
case 16:
return brw_vec16_reg(file, nr, subnr);
default:
- assert(!"Invalid register width");
+ unreachable("Invalid register width");
}
- unreachable();
}
static inline struct brw_reg
-retype(struct brw_reg reg, unsigned type)
+retype(struct brw_reg reg, enum brw_reg_type type)
{
reg.type = type;
return reg;
}
+static inline struct brw_reg
+firsthalf(struct brw_reg reg)
+{
+ return reg;
+}
+
static inline struct brw_reg
sechalf(struct brw_reg reg)
{
}
static inline struct brw_reg
-brw_imm_reg(unsigned type)
+brw_imm_reg(enum brw_reg_type type)
{
return brw_reg(BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
0,
0,
type,
return imm;
}
-#define VF_ZERO 0x0
-#define VF_ONE 0x30
-#define VF_NEG (1<<7)
+/**
+ * Convert an integer into a "restricted" 8-bit float, used in vector
+ * immediates. The 8-bit floating point format has a sign bit, an
+ * excess-3 3-bit exponent, and a 4-bit mantissa. All integer values
+ * from -31 to 31 can be represented exactly.
+ */
+static inline uint8_t
+int_to_float8(int x)
+{
+ if (x == 0) {
+ return 0;
+ } else if (x < 0) {
+ return 1 << 7 | int_to_float8(-x);
+ } else {
+ const unsigned exponent = _mesa_logbase2(x);
+ const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent);
+ assert(exponent <= 4);
+ return (exponent + 3) << 4 | mantissa;
+ }
+}
+/**
+ * Construct a floating-point packed vector immediate from its integer
+ * values. \sa int_to_float8()
+ */
static inline struct brw_reg
-brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+brw_imm_vf4(int v0, int v1, int v2, int v3)
{
- struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
- imm.vstride = BRW_VERTICAL_STRIDE_0;
- imm.width = BRW_WIDTH_4;
- imm.hstride = BRW_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
- return imm;
+ return brw_imm_vf((int_to_float8(v0) << 0) |
+ (int_to_float8(v1) << 8) |
+ (int_to_float8(v2) << 16) |
+ (int_to_float8(v3) << 24));
}
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
}
+static inline struct brw_reg
+brw_null_vec(unsigned width)
+{
+ return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
static inline struct brw_reg
brw_address_reg(unsigned subnr)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_IP,
0,
+ 0,
+ 0,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_4, /* ? */
BRW_WIDTH_1,
}
static inline struct brw_reg
-brw_acc_reg(void)
-{
- return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
-}
-
-static inline struct brw_reg
-brw_notification_1_reg(void)
+brw_acc_reg(unsigned width)
{
-
- return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_NOTIFICATION_COUNT,
- 1,
- BRW_REGISTER_TYPE_UD,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_1,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XXXX,
- WRITEMASK_X);
+ return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR, 0);
}
-
static inline struct brw_reg
brw_flag_reg(int reg, int subreg)
{
return reg;
}
+/**
+ * Multiply the vertical and horizontal stride of a register by the given
+ * factor \a s.
+ */
+static inline struct brw_reg
+spread(struct brw_reg reg, unsigned s)
+{
+ if (s) {
+ assert(is_power_of_two(s));
+
+ if (reg.hstride)
+ reg.hstride += cvt(s) - 1;
+
+ if (reg.vstride)
+ reg.vstride += cvt(s) - 1;
+
+ return reg;
+ } else {
+ return stride(reg, 0, 1, 0);
+ }
+}
static inline struct brw_reg
vec16(struct brw_reg reg)
return ptr;
}
+static inline bool
+region_matches(struct brw_reg reg, enum brw_vertical_stride v,
+ enum brw_width w, enum brw_horizontal_stride h)
+{
+ return reg.vstride == v &&
+ reg.width == w &&
+ reg.hstride == h;
+}
+
+#define has_scalar_region(reg) \
+ region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
+ BRW_HORIZONTAL_STRIDE_0)
+
+/* brw_packed_float.c */
+int brw_float_to_vf(float f);
+float brw_vf_to_float(unsigned char vf);
+
#ifdef __cplusplus
}
#endif