/* Using the GNU statement expression extension */
#define SET_FIELD(value, field) \
({ \
- uint32_t fieldval = (value) << field ## _SHIFT; \
+ uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \
assert((fieldval & ~ field ## _MASK) == 0); \
fieldval & field ## _MASK; \
})
#define SET_BITS(value, high, low) \
({ \
- const uint32_t fieldval = (value) << (low); \
+ const uint32_t fieldval = (uint32_t)(value) << (low); \
assert((fieldval & ~INTEL_MASK(high, low)) == 0); \
fieldval & INTEL_MASK(high, low); \
})
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
BRW_OPCODE_DIM = 10, /**< Gen7.5 only */ /* Reused */
- // BRW_OPCODE_SMOV = 10, /**< Gen8+ */ /* Reused */
+ BRW_OPCODE_SMOV = 10, /**< Gen8+ */ /* Reused */
/* Reserved - 11 */
BRW_OPCODE_ASR = 12,
- /* Reserved - 13-15 */
+ /* Reserved - 13 */
+ BRW_OPCODE_ROR = 14, /**< Gen11+ */
+ BRW_OPCODE_ROL = 15, /**< Gen11+ */
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_CSEL = 18, /**< Gen8+ */
BRW_OPCODE_BFI2 = 26, /**< Gen7+ */
/* Reserved - 27-31 */
BRW_OPCODE_JMPI = 32,
- // BRW_OPCODE_BRD = 33, /**< Gen7+ */
+ BRW_OPCODE_BRD = 33, /**< Gen7+ */
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */ /* Reused */
- // BRW_OPCODE_BRC = 35, /**< Gen7+ */ /* Reused */
+ BRW_OPCODE_BRC = 35, /**< Gen7+ */ /* Reused */
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
BRW_OPCODE_DO = 38, /**< Pre-Gen6 */ /* Reused */
- // BRW_OPCODE_CASE = 38, /**< Gen6 only */ /* Reused */
+ BRW_OPCODE_CASE = 38, /**< Gen6 only */ /* Reused */
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
- // BRW_OPCODE_CALLA = 43, /**< Gen7.5+ */
- // BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */ /* Reused */
- // BRW_OPCODE_CALL = 44, /**< Gen6+ */ /* Reused */
- // BRW_OPCODE_MREST = 45, /**< Pre-Gen6 */ /* Reused */
- // BRW_OPCODE_RET = 45, /**< Gen6+ */ /* Reused */
- // BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */ /* Reused */
- // BRW_OPCODE_FORK = 46, /**< Gen6 only */ /* Reused */
- // BRW_OPCODE_GOTO = 46, /**< Gen8+ */ /* Reused */
- // BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
+ BRW_OPCODE_CALLA = 43, /**< Gen7.5+ */
+ BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */ /* Reused */
+ BRW_OPCODE_CALL = 44, /**< Gen6+ */ /* Reused */
+ BRW_OPCODE_MREST = 45, /**< Pre-Gen6 */ /* Reused */
+ BRW_OPCODE_RET = 45, /**< Gen6+ */ /* Reused */
+ BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */ /* Reused */
+ BRW_OPCODE_FORK = 46, /**< Gen6 only */ /* Reused */
+ BRW_OPCODE_GOTO = 46, /**< Gen8+ */ /* Reused */
+ BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
BRW_OPCODE_PLN = 90, /**< G45+ */
BRW_OPCODE_MAD = 91, /**< Gen6+ */
BRW_OPCODE_LRP = 92, /**< Gen6+ */
- // BRW_OPCODE_MADM = 93, /**< Gen8+ */
+ BRW_OPCODE_MADM = 93, /**< Gen8+ */
/* Reserved 94-124 */
BRW_OPCODE_NENOP = 125, /**< G45 only */
BRW_OPCODE_NOP = 126,
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
+ /**
+ * A generic "send" opcode. The first two sources are the message
+ * descriptor and extended message descriptor respectively. The third
+ * and optional fourth sources are the message payload
+ */
+ SHADER_OPCODE_SEND,
+
+ /**
+ * An "undefined" write which does nothing but indicates to liveness that
+ * we don't care about any values in the register which predate this
+ * instruction. Used to prevent partial writes from causing issues with
+ * live ranges.
+ */
+ SHADER_OPCODE_UNDEF,
+
/**
* Texture sampling opcodes.
*
SHADER_OPCODE_SAMPLEINFO,
SHADER_OPCODE_SAMPLEINFO_LOGICAL,
+ SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
+
/**
* Combines multiple sources of size 1 into a larger virtual GRF.
* For example, parameters for a send-from-GRF message. Or, updating
* Source 4: [required] Opcode-specific control immediate, same as source 2
* of the matching non-LOGICAL opcode.
*/
- SHADER_OPCODE_UNTYPED_ATOMIC,
+ VEC4_OPCODE_UNTYPED_ATOMIC,
SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
- SHADER_OPCODE_UNTYPED_SURFACE_READ,
+ SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+ VEC4_OPCODE_UNTYPED_SURFACE_READ,
SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
- SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+ VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
- SHADER_OPCODE_TYPED_ATOMIC,
+ /**
+ * Untyped A64 surface access opcodes.
+ *
+ * Source 0: 64-bit address
+ * Source 1: Operational source
+ * Source 2: [required] Opcode-specific control immediate, same as source 2
+ * of the matching non-LOGICAL opcode.
+ */
+ SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
+ SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
+ SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
- SHADER_OPCODE_TYPED_SURFACE_READ,
SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
- SHADER_OPCODE_TYPED_SURFACE_WRITE,
SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
SHADER_OPCODE_RND_MODE,
+ SHADER_OPCODE_FLOAT_CONTROL_MODE,
/**
* Byte scattered write/read opcodes.
* opcode, but instead of taking a single payload blog they expect their
* arguments separately as individual sources, like untyped write/read.
*/
- SHADER_OPCODE_BYTE_SCATTERED_READ,
SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
- SHADER_OPCODE_BYTE_SCATTERED_WRITE,
SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
+ /**
+ * Memory fence messages.
+ *
+ * Source 0: Must be register g0, used as header.
+ * Source 1: Immediate bool to indicate whether or not we need to stall
+ * until memory transactions prior to the fence are completed.
+ * Source 2: Immediate byte indicating which memory to fence. Zero means
+ * global memory; GEN7_BTI_SLM means SLM (for Gen11+ only).
+ *
+ * Vec4 backend only uses Source 0.
+ */
SHADER_OPCODE_MEMORY_FENCE,
SHADER_OPCODE_GEN4_SCRATCH_READ,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
- FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
- FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
- FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
FS_OPCODE_PLACEHOLDER_HALT,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
TEX_LOGICAL_SRC_LOD,
/** dPdy if the operation takes explicit derivatives */
TEX_LOGICAL_SRC_LOD2,
+ /** Min LOD */
+ TEX_LOGICAL_SRC_MIN_LOD,
/** Sample index */
TEX_LOGICAL_SRC_SAMPLE_INDEX,
/** MCS data */
TEX_LOGICAL_SRC_SURFACE,
/** Texture sampler index */
TEX_LOGICAL_SRC_SAMPLER,
+ /** Texture surface bindless handle */
+ TEX_LOGICAL_SRC_SURFACE_HANDLE,
+ /** Texture sampler bindless handle */
+ TEX_LOGICAL_SRC_SAMPLER_HANDLE,
/** Texel offset for gathers */
TEX_LOGICAL_SRC_TG4_OFFSET,
/** REQUIRED: Number of coordinate components (as UD immediate) */
TEX_LOGICAL_NUM_SRCS,
};
+enum surface_logical_srcs {
+ /** Surface binding table index */
+ SURFACE_LOGICAL_SRC_SURFACE,
+ /** Surface bindless handle */
+ SURFACE_LOGICAL_SRC_SURFACE_HANDLE,
+ /** Surface address; could be multi-dimensional for typed opcodes */
+ SURFACE_LOGICAL_SRC_ADDRESS,
+ /** Data to be written or used in an atomic op */
+ SURFACE_LOGICAL_SRC_DATA,
+ /** Surface number of dimensions. Affects the size of ADDRESS */
+ SURFACE_LOGICAL_SRC_IMM_DIMS,
+ /** Per-opcode immediate argument. For atomics, this is the atomic opcode */
+ SURFACE_LOGICAL_SRC_IMM_ARG,
+
+ SURFACE_LOGICAL_NUM_SRCS
+};
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
+#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
+#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
+#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
+#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d
/* GEN9 */
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
#define GEN9_DATAPORT_RC_RENDER_TARGET_READ 13
+/* A64 scattered message subtype */
+#define GEN8_A64_SCATTERED_SUBTYPE_BYTE 0
+#define GEN8_A64_SCATTERED_SUBTYPE_DWORD 1
+#define GEN8_A64_SCATTERED_SUBTYPE_QWORD 2
+#define GEN8_A64_SCATTERED_SUBTYPE_HWORD 3
+
/* Dataport special binding table indices: */
#define BRW_BTI_STATELESS 255
#define GEN7_BTI_SLM 254
*/
#define GEN8_BTI_STATELESS_IA_COHERENT 255
#define GEN8_BTI_STATELESS_NON_COHERENT 253
+#define GEN9_BTI_BINDLESS 252
-/* dataport atomic operations. */
+/* Dataport atomic operations for Untyped Atomic Integer Operation message
+ * (and others).
+ */
#define BRW_AOP_AND 1
#define BRW_AOP_OR 2
#define BRW_AOP_XOR 3
#define BRW_AOP_CMPWR 14
#define BRW_AOP_PREDEC 15
+/* Dataport atomic operations for Untyped Atomic Float Operation message. */
+#define BRW_AOP_FMAX 1
+#define BRW_AOP_FMIN 2
+#define BRW_AOP_FCMPWR 3
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
BRW_RND_MODE_UNSPECIFIED, /* Unspecified rounding mode */
};
+#define BRW_CR0_FP64_DENORM_PRESERVE (1 << 6)
+#define BRW_CR0_FP32_DENORM_PRESERVE (1 << 7)
+#define BRW_CR0_FP16_DENORM_PRESERVE (1 << 10)
+
+#define BRW_CR0_FP_MODE_MASK (BRW_CR0_FP64_DENORM_PRESERVE | \
+ BRW_CR0_FP32_DENORM_PRESERVE | \
+ BRW_CR0_FP16_DENORM_PRESERVE | \
+ BRW_CR0_RND_MODE_MASK)
+
/* MDC_DS - Data Size Message Descriptor Control Field
* Skylake PRM, Volume 2d, page 129
*