intel/eu: Split brw_inst ex_desc accessors for SEND(C) vs. SENDS(C).
[mesa.git] / src / intel / compiler / brw_eu_defines.h
index fb8caa60715106c6df409c6cdebd02878c15fd62..7024f010f9fc08033fca5ca374cda4ca6d1bc217 100644 (file)
 /* Using the GNU statement expression extension */
 #define SET_FIELD(value, field)                                         \
    ({                                                                   \
-      uint32_t fieldval = (value) << field ## _SHIFT;                   \
+      uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT;         \
       assert((fieldval & ~ field ## _MASK) == 0);                       \
       fieldval & field ## _MASK;                                        \
    })
 
 #define SET_BITS(value, high, low)                                      \
    ({                                                                   \
-      const uint32_t fieldval = (value) << (low);                       \
+      const uint32_t fieldval = (uint32_t)(value) << (low);             \
       assert((fieldval & ~INTEL_MASK(high, low)) == 0);                 \
       fieldval & INTEL_MASK(high, low);                                 \
    })
@@ -207,10 +207,12 @@ enum opcode {
    BRW_OPCODE_SHR =    8,
    BRW_OPCODE_SHL =    9,
    BRW_OPCODE_DIM =    10,  /**< Gen7.5 only */ /* Reused */
-   // BRW_OPCODE_SMOV =        10,  /**< Gen8+       */ /* Reused */
+   BRW_OPCODE_SMOV =   10,  /**< Gen8+       */ /* Reused */
    /* Reserved - 11 */
    BRW_OPCODE_ASR =    12,
-   /* Reserved - 13-15 */
+   /* Reserved - 13 */
+   BRW_OPCODE_ROR =    14,  /**< Gen11+ */
+   BRW_OPCODE_ROL =    15,  /**< Gen11+ */
    BRW_OPCODE_CMP =    16,
    BRW_OPCODE_CMPN =   17,
    BRW_OPCODE_CSEL =   18,  /**< Gen8+ */
@@ -223,27 +225,27 @@ enum opcode {
    BRW_OPCODE_BFI2 =   26,  /**< Gen7+ */
    /* Reserved - 27-31 */
    BRW_OPCODE_JMPI =   32,
-   // BRW_OPCODE_BRD = 33,  /**< Gen7+ */
+   BRW_OPCODE_BRD =    33,  /**< Gen7+ */
    BRW_OPCODE_IF =     34,
    BRW_OPCODE_IFF =    35,  /**< Pre-Gen6    */ /* Reused */
-   // BRW_OPCODE_BRC = 35,  /**< Gen7+       */ /* Reused */
+   BRW_OPCODE_BRC =    35,  /**< Gen7+       */ /* Reused */
    BRW_OPCODE_ELSE =   36,
    BRW_OPCODE_ENDIF =  37,
    BRW_OPCODE_DO =     38,  /**< Pre-Gen6    */ /* Reused */
-   // BRW_OPCODE_CASE =        38,  /**< Gen6 only   */ /* Reused */
+   BRW_OPCODE_CASE =   38,  /**< Gen6 only   */ /* Reused */
    BRW_OPCODE_WHILE =  39,
    BRW_OPCODE_BREAK =  40,
    BRW_OPCODE_CONTINUE = 41,
    BRW_OPCODE_HALT =   42,
-   // BRW_OPCODE_CALLA =       43,  /**< Gen7.5+     */
-   // BRW_OPCODE_MSAVE =       44,  /**< Pre-Gen6    */ /* Reused */
-   // BRW_OPCODE_CALL =        44,  /**< Gen6+       */ /* Reused */
-   // BRW_OPCODE_MREST =       45,  /**< Pre-Gen6    */ /* Reused */
-   // BRW_OPCODE_RET = 45,  /**< Gen6+       */ /* Reused */
-   // BRW_OPCODE_PUSH =        46,  /**< Pre-Gen6    */ /* Reused */
-   // BRW_OPCODE_FORK =        46,  /**< Gen6 only   */ /* Reused */
-   // BRW_OPCODE_GOTO =        46,  /**< Gen8+       */ /* Reused */
-   // BRW_OPCODE_POP = 47,  /**< Pre-Gen6    */
+   BRW_OPCODE_CALLA =  43,  /**< Gen7.5+     */
+   BRW_OPCODE_MSAVE =  44,  /**< Pre-Gen6    */ /* Reused */
+   BRW_OPCODE_CALL =   44,  /**< Gen6+       */ /* Reused */
+   BRW_OPCODE_MREST =  45,  /**< Pre-Gen6    */ /* Reused */
+   BRW_OPCODE_RET =    45,  /**< Gen6+       */ /* Reused */
+   BRW_OPCODE_PUSH =   46,  /**< Pre-Gen6    */ /* Reused */
+   BRW_OPCODE_FORK =   46,  /**< Gen6 only   */ /* Reused */
+   BRW_OPCODE_GOTO =   46,  /**< Gen8+       */ /* Reused */
+   BRW_OPCODE_POP =    47,  /**< Pre-Gen6    */
    BRW_OPCODE_WAIT =   48,
    BRW_OPCODE_SEND =   49,
    BRW_OPCODE_SENDC =  50,
@@ -280,7 +282,7 @@ enum opcode {
    BRW_OPCODE_PLN =    90,  /**< G45+ */
    BRW_OPCODE_MAD =    91,  /**< Gen6+ */
    BRW_OPCODE_LRP =    92,  /**< Gen6+ */
-   // BRW_OPCODE_MADM =        93,  /**< Gen8+ */
+   BRW_OPCODE_MADM =   93,  /**< Gen8+ */
    /* Reserved 94-124 */
    BRW_OPCODE_NENOP =  125, /**< G45 only */
    BRW_OPCODE_NOP =    126,
@@ -315,6 +317,21 @@ enum opcode {
    SHADER_OPCODE_SIN,
    SHADER_OPCODE_COS,
 
+   /**
+    * A generic "send" opcode.  The first two sources are the message
+    * descriptor and extended message descriptor respectively.  The third
+    * and optional fourth sources are the message payload
+    */
+   SHADER_OPCODE_SEND,
+
+   /**
+    * An "undefined" write which does nothing but indicates to liveness that
+    * we don't care about any values in the register which predate this
+    * instruction.  Used to prevent partial writes from causing issues with
+    * live ranges.
+    */
+   SHADER_OPCODE_UNDEF,
+
    /**
     * Texture sampling opcodes.
     *
@@ -354,6 +371,8 @@ enum opcode {
    SHADER_OPCODE_SAMPLEINFO,
    SHADER_OPCODE_SAMPLEINFO_LOGICAL,
 
+   SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
+
    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
     * For example, parameters for a send-from-GRF message.  Or, updating
@@ -393,21 +412,36 @@ enum opcode {
     * Source 4: [required] Opcode-specific control immediate, same as source 2
     *                      of the matching non-LOGICAL opcode.
     */
-   SHADER_OPCODE_UNTYPED_ATOMIC,
+   VEC4_OPCODE_UNTYPED_ATOMIC,
    SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
-   SHADER_OPCODE_UNTYPED_SURFACE_READ,
+   SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+   VEC4_OPCODE_UNTYPED_SURFACE_READ,
    SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
-   SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+   VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
    SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
 
-   SHADER_OPCODE_TYPED_ATOMIC,
+   /**
+    * Untyped A64 surface access opcodes.
+    *
+    * Source 0: 64-bit address
+    * Source 1: Operational source
+    * Source 2: [required] Opcode-specific control immediate, same as source 2
+    *                      of the matching non-LOGICAL opcode.
+    */
+   SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
+   SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
+   SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+
    SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
-   SHADER_OPCODE_TYPED_SURFACE_READ,
    SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
-   SHADER_OPCODE_TYPED_SURFACE_WRITE,
    SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
    SHADER_OPCODE_RND_MODE,
+   SHADER_OPCODE_FLOAT_CONTROL_MODE,
 
    /**
     * Byte scattered write/read opcodes.
@@ -416,11 +450,20 @@ enum opcode {
     * opcode, but instead of taking a single payload blog they expect their
     * arguments separately as individual sources, like untyped write/read.
     */
-   SHADER_OPCODE_BYTE_SCATTERED_READ,
    SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
-   SHADER_OPCODE_BYTE_SCATTERED_WRITE,
    SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
 
+   /**
+    * Memory fence messages.
+    *
+    * Source 0: Must be register g0, used as header.
+    * Source 1: Immediate bool to indicate whether or not we need to stall
+    *           until memory transactions prior to the fence are completed.
+    * Source 2: Immediate byte indicating which memory to fence.  Zero means
+    *           global memory; GEN7_BTI_SLM means SLM (for Gen11+ only).
+    *
+    * Vec4 backend only uses Source 0.
+    */
    SHADER_OPCODE_MEMORY_FENCE,
 
    SHADER_OPCODE_GEN4_SCRATCH_READ,
@@ -514,13 +557,10 @@ enum opcode {
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
-   FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
    FS_OPCODE_DISCARD_JUMP,
    FS_OPCODE_SET_SAMPLE_ID,
    FS_OPCODE_PACK_HALF_2x16_SPLIT,
-   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
-   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
    FS_OPCODE_PLACEHOLDER_HALT,
    FS_OPCODE_INTERPOLATE_AT_SAMPLE,
    FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -807,6 +847,8 @@ enum tex_logical_srcs {
    TEX_LOGICAL_SRC_LOD,
    /** dPdy if the operation takes explicit derivatives */
    TEX_LOGICAL_SRC_LOD2,
+   /** Min LOD */
+   TEX_LOGICAL_SRC_MIN_LOD,
    /** Sample index */
    TEX_LOGICAL_SRC_SAMPLE_INDEX,
    /** MCS data */
@@ -815,6 +857,10 @@ enum tex_logical_srcs {
    TEX_LOGICAL_SRC_SURFACE,
    /** Texture sampler index */
    TEX_LOGICAL_SRC_SAMPLER,
+   /** Texture surface bindless handle */
+   TEX_LOGICAL_SRC_SURFACE_HANDLE,
+   /** Texture sampler bindless handle */
+   TEX_LOGICAL_SRC_SAMPLER_HANDLE,
    /** Texel offset for gathers */
    TEX_LOGICAL_SRC_TG4_OFFSET,
    /** REQUIRED: Number of coordinate components (as UD immediate) */
@@ -825,6 +871,23 @@ enum tex_logical_srcs {
    TEX_LOGICAL_NUM_SRCS,
 };
 
+enum surface_logical_srcs {
+   /** Surface binding table index */
+   SURFACE_LOGICAL_SRC_SURFACE,
+   /** Surface bindless handle */
+   SURFACE_LOGICAL_SRC_SURFACE_HANDLE,
+   /** Surface address; could be multi-dimensional for typed opcodes */
+   SURFACE_LOGICAL_SRC_ADDRESS,
+   /** Data to be written or used in an atomic op */
+   SURFACE_LOGICAL_SRC_DATA,
+   /** Surface number of dimensions.  Affects the size of ADDRESS */
+   SURFACE_LOGICAL_SRC_IMM_DIMS,
+   /** Per-opcode immediate argument.  For atomics, this is the atomic opcode */
+   SURFACE_LOGICAL_SRC_IMM_ARG,
+
+   SURFACE_LOGICAL_NUM_SRCS
+};
+
 #ifdef __cplusplus
 /**
  * Allow brw_urb_write_flags enums to be ORed together.
@@ -1159,11 +1222,24 @@ enum brw_message_target {
 #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP                     11
 #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2             12
 #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
+#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ                   0x10
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ             0x11
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP                0x12
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE            0x19
+#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE                  0x1a
+#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP              0x1b
+#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP          0x1d
 
 /* GEN9 */
 #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE                        12
 #define GEN9_DATAPORT_RC_RENDER_TARGET_READ                         13
 
+/* A64 scattered message subtype */
+#define GEN8_A64_SCATTERED_SUBTYPE_BYTE                             0
+#define GEN8_A64_SCATTERED_SUBTYPE_DWORD                            1
+#define GEN8_A64_SCATTERED_SUBTYPE_QWORD                            2
+#define GEN8_A64_SCATTERED_SUBTYPE_HWORD                            3
+
 /* Dataport special binding table indices: */
 #define BRW_BTI_STATELESS                255
 #define GEN7_BTI_SLM                     254
@@ -1176,8 +1252,11 @@ enum brw_message_target {
  */
 #define GEN8_BTI_STATELESS_IA_COHERENT   255
 #define GEN8_BTI_STATELESS_NON_COHERENT  253
+#define GEN9_BTI_BINDLESS                252
 
-/* dataport atomic operations. */
+/* Dataport atomic operations for Untyped Atomic Integer Operation message
+ * (and others).
+ */
 #define BRW_AOP_AND                   1
 #define BRW_AOP_OR                    2
 #define BRW_AOP_XOR                   3
@@ -1194,6 +1273,11 @@ enum brw_message_target {
 #define BRW_AOP_CMPWR                 14
 #define BRW_AOP_PREDEC                15
 
+/* Dataport atomic operations for Untyped Atomic Float Operation message. */
+#define BRW_AOP_FMAX                  1
+#define BRW_AOP_FMIN                  2
+#define BRW_AOP_FCMPWR                3
+
 #define BRW_MATH_FUNCTION_INV                              1
 #define BRW_MATH_FUNCTION_LOG                              2
 #define BRW_MATH_FUNCTION_EXP                              3
@@ -1300,6 +1384,15 @@ enum PACKED brw_rnd_mode {
    BRW_RND_MODE_UNSPECIFIED,  /* Unspecified rounding mode */
 };
 
+#define BRW_CR0_FP64_DENORM_PRESERVE (1 << 6)
+#define BRW_CR0_FP32_DENORM_PRESERVE (1 << 7)
+#define BRW_CR0_FP16_DENORM_PRESERVE (1 << 10)
+
+#define BRW_CR0_FP_MODE_MASK (BRW_CR0_FP64_DENORM_PRESERVE | \
+                              BRW_CR0_FP32_DENORM_PRESERVE | \
+                              BRW_CR0_FP16_DENORM_PRESERVE | \
+                              BRW_CR0_RND_MODE_MASK)
+
 /* MDC_DS - Data Size Message Descriptor Control Field
  * Skylake PRM, Volume 2d, page 129
  *