i965/fs: Define logical framebuffer write opcode.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_defines.h
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h

index 1b573448c062bec741d6b983d0d2171782f0faa1..9f8d7337047ed522df14f1aabd238a1cf253001e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -38,6 +38,7 @@
        fieldval & field ## _MASK;                                        \
     })
  
+#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
  #define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
  
  #ifndef BRW_DEFINES_H
@@ -51,6 +52,7 @@
  # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
  # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
  # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE      (1 << 10)
+# define GEN7_3DPRIM_PREDICATE_ENABLE               (1 << 8)
  /* DW1 */
  # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
  # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
@@ -77,6 +79,13 @@
  #define _3DPRIM_LINESTRIP_CONT_BF 0x14
  #define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
  
+/* We use this offset to be able to pass native primitive types in struct
+ * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
+ * native_type, which should be different from all GL types and still fit in
+ * the 8 bits avialable. */
+
+#define BRW_PRIM_OFFSET           0x80
+
  #define BRW_ANISORATIO_2     0
  #define BRW_ANISORATIO_4     1
  #define BRW_ANISORATIO_6     2
@@ -264,6 +273,7 @@
  #define GEN8_SURFACE_HALIGN_8                       (2 << 14)
  #define GEN8_SURFACE_HALIGN_16                      (3 << 14)
  #define GEN8_SURFACE_TILING_NONE                    (0 << 12)
+#define GEN8_SURFACE_TILING_W                       (1 << 12)
  #define GEN8_SURFACE_TILING_X                       (2 << 12)
  #define GEN8_SURFACE_TILING_Y                       (3 << 12)
  #define BRW_SURFACE_RC_READ_WRITE      (1 << 8)
@@ -520,6 +530,12 @@
  #define GEN7_SURFACE_ARYSPC_FULL       (0 << 10)
  #define GEN7_SURFACE_ARYSPC_LOD0       (1 << 10)
  
+/* Surface state DW1 */
+#define GEN8_SURFACE_MOCS_SHIFT         24
+#define GEN8_SURFACE_MOCS_MASK          INTEL_MASK(30, 24)
+#define GEN8_SURFACE_QPITCH_SHIFT       0
+#define GEN8_SURFACE_QPITCH_MASK        INTEL_MASK(14, 0)
+
  /* Surface state DW2 */
  #define BRW_SURFACE_HEIGHT_SHIFT       19
  #define BRW_SURFACE_HEIGHT_MASK                INTEL_MASK(31, 19)
@@ -539,10 +555,15 @@
  #define BRW_SURFACE_PITCH_MASK         INTEL_MASK(19, 3)
  #define BRW_SURFACE_TILED              (1 << 1)
  #define BRW_SURFACE_TILED_Y            (1 << 0)
+#define HSW_SURFACE_IS_INTEGER_FORMAT   (1 << 18)
  
  /* Surface state DW4 */
  #define BRW_SURFACE_MIN_LOD_SHIFT      28
  #define BRW_SURFACE_MIN_LOD_MASK       INTEL_MASK(31, 28)
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT    17
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK     INTEL_MASK(27, 17)
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT    8
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK     INTEL_MASK(16, 8)
  #define BRW_SURFACE_MULTISAMPLECOUNT_1  (0 << 4)
  #define BRW_SURFACE_MULTISAMPLECOUNT_4  (2 << 4)
  #define GEN7_SURFACE_MULTISAMPLECOUNT_1         (0 << 3)
@@ -571,12 +592,33 @@
  #define GEN7_SURFACE_MOCS_SHIFT                 16
  #define GEN7_SURFACE_MOCS_MASK                  INTEL_MASK(19, 16)
  
+#define GEN9_SURFACE_TRMODE_SHIFT          18
+#define GEN9_SURFACE_TRMODE_MASK           INTEL_MASK(19, 18)
+#define GEN9_SURFACE_TRMODE_NONE           0
+#define GEN9_SURFACE_TRMODE_TILEYF         1
+#define GEN9_SURFACE_TRMODE_TILEYS         2
+
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT      8
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK       INTEL_MASK(11, 8)
+
  /* Surface state DW6 */
  #define GEN7_SURFACE_MCS_ENABLE                 (1 << 0)
  #define GEN7_SURFACE_MCS_PITCH_SHIFT            3
  #define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
+#define GEN8_SURFACE_AUX_QPITCH_SHIFT           16
+#define GEN8_SURFACE_AUX_QPITCH_MASK            INTEL_MASK(30, 16)
+#define GEN8_SURFACE_AUX_PITCH_SHIFT            3
+#define GEN8_SURFACE_AUX_PITCH_MASK             INTEL_MASK(11, 3)
+#define GEN8_SURFACE_AUX_MODE_MASK              INTEL_MASK(2, 0)
+
+#define GEN8_SURFACE_AUX_MODE_NONE              0
+#define GEN8_SURFACE_AUX_MODE_MCS               1
+#define GEN8_SURFACE_AUX_MODE_APPEND            2
+#define GEN8_SURFACE_AUX_MODE_HIZ               3
  
  /* Surface state DW7 */
+#define GEN9_SURFACE_RT_COMPRESSION_SHIFT       30
+#define GEN9_SURFACE_RT_COMPRESSION_MASK        INTEL_MASK(30, 30)
  #define GEN7_SURFACE_CLEAR_COLOR_SHIFT         28
  #define GEN7_SURFACE_SCS_R_SHIFT                25
  #define GEN7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
@@ -595,12 +637,69 @@
  #define HSW_SCS_BLUE                     6
  #define HSW_SCS_ALPHA                    7
  
-#define BRW_TEXCOORDMODE_WRAP            0
-#define BRW_TEXCOORDMODE_MIRROR          1
-#define BRW_TEXCOORDMODE_CLAMP           2
-#define BRW_TEXCOORDMODE_CUBE            3
-#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
-#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+/* SAMPLER_STATE DW0 */
+#define BRW_SAMPLER_DISABLE                     (1 << 31)
+#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE         (1 << 28)
+#define GEN6_SAMPLER_MIN_MAG_NOT_EQUAL          (1 << 27) /* Gen6 only */
+#define BRW_SAMPLER_BASE_MIPLEVEL_MASK          INTEL_MASK(26, 22)
+#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT         22
+#define BRW_SAMPLER_MIP_FILTER_MASK             INTEL_MASK(21, 20)
+#define BRW_SAMPLER_MIP_FILTER_SHIFT            20
+#define BRW_SAMPLER_MAG_FILTER_MASK             INTEL_MASK(19, 17)
+#define BRW_SAMPLER_MAG_FILTER_SHIFT            17
+#define BRW_SAMPLER_MIN_FILTER_MASK             INTEL_MASK(16, 14)
+#define BRW_SAMPLER_MIN_FILTER_SHIFT            14
+#define GEN4_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 3)
+#define GEN4_SAMPLER_LOD_BIAS_SHIFT             3
+#define GEN4_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(2, 0)
+#define GEN4_SAMPLER_SHADOW_FUNCTION_SHIFT      0
+
+#define GEN7_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 1)
+#define GEN7_SAMPLER_LOD_BIAS_SHIFT             1
+#define GEN7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM  (1 << 0)
+
+/* SAMPLER_STATE DW1 */
+#define GEN4_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 22)
+#define GEN4_SAMPLER_MIN_LOD_SHIFT              22
+#define GEN4_SAMPLER_MAX_LOD_MASK               INTEL_MASK(21, 12)
+#define GEN4_SAMPLER_MAX_LOD_SHIFT              12
+#define GEN4_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 9)
+/* Wrap modes are in DW1 on Gen4-6 and DW3 on Gen7+ */
+#define BRW_SAMPLER_TCX_WRAP_MODE_MASK          INTEL_MASK(8, 6)
+#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT         6
+#define BRW_SAMPLER_TCY_WRAP_MODE_MASK          INTEL_MASK(5, 3)
+#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT         3
+#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK          INTEL_MASK(2, 0)
+#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT         0
+
+#define GEN7_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 20)
+#define GEN7_SAMPLER_MIN_LOD_SHIFT              20
+#define GEN7_SAMPLER_MAX_LOD_MASK               INTEL_MASK(19, 8)
+#define GEN7_SAMPLER_MAX_LOD_SHIFT              8
+#define GEN7_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(3, 1)
+#define GEN7_SAMPLER_SHADOW_FUNCTION_SHIFT      1
+#define GEN7_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 0)
+
+/* SAMPLER_STATE DW2 - border color pointer */
+
+/* SAMPLER_STATE DW3 */
+#define BRW_SAMPLER_MAX_ANISOTROPY_MASK         INTEL_MASK(21, 19)
+#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT        19
+#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK       INTEL_MASK(18, 13)
+#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT      13
+#define GEN7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
+/* Gen7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
+#define GEN6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
+
+enum brw_wrap_mode {
+   BRW_TEXCOORDMODE_WRAP         = 0,
+   BRW_TEXCOORDMODE_MIRROR       = 1,
+   BRW_TEXCOORDMODE_CLAMP        = 2,
+   BRW_TEXCOORDMODE_CUBE         = 3,
+   BRW_TEXCOORDMODE_CLAMP_BORDER = 4,
+   BRW_TEXCOORDMODE_MIRROR_ONCE  = 5,
+   GEN8_TEXCOORDMODE_HALF_BORDER = 6,
+};
  
  #define BRW_THREAD_PRIORITY_NORMAL   0
  #define BRW_THREAD_PRIORITY_HIGH     1
@@ -638,18 +737,20 @@ enum brw_compression {
  #define GEN6_COMPRESSION_1H            0
  #define GEN6_COMPRESSION_2H            2
  
-#define BRW_CONDITIONAL_NONE  0
-#define BRW_CONDITIONAL_Z     1
-#define BRW_CONDITIONAL_NZ    2
-#define BRW_CONDITIONAL_EQ    1        /* Z */
-#define BRW_CONDITIONAL_NEQ   2        /* NZ */
-#define BRW_CONDITIONAL_G     3
-#define BRW_CONDITIONAL_GE    4
-#define BRW_CONDITIONAL_L     5
-#define BRW_CONDITIONAL_LE    6
-#define BRW_CONDITIONAL_R     7
-#define BRW_CONDITIONAL_O     8
-#define BRW_CONDITIONAL_U     9
+enum PACKED brw_conditional_mod {
+   BRW_CONDITIONAL_NONE = 0,
+   BRW_CONDITIONAL_Z    = 1,
+   BRW_CONDITIONAL_NZ   = 2,
+   BRW_CONDITIONAL_EQ   = 1,   /* Z */
+   BRW_CONDITIONAL_NEQ  = 2,   /* NZ */
+   BRW_CONDITIONAL_G    = 3,
+   BRW_CONDITIONAL_GE   = 4,
+   BRW_CONDITIONAL_L    = 5,
+   BRW_CONDITIONAL_LE   = 6,
+   BRW_CONDITIONAL_R    = 7,    /* Gen <= 5 */
+   BRW_CONDITIONAL_O    = 8,
+   BRW_CONDITIONAL_U    = 9,
+};
  
  #define BRW_DEBUG_NONE        0
  #define BRW_DEBUG_BREAKPOINT  1
@@ -659,17 +760,21 @@ enum brw_compression {
  #define BRW_DEPENDENCY_NOTCHECKED     2
  #define BRW_DEPENDENCY_DISABLE        3
  
-#define BRW_EXECUTE_1     0
-#define BRW_EXECUTE_2     1
-#define BRW_EXECUTE_4     2
-#define BRW_EXECUTE_8     3
-#define BRW_EXECUTE_16    4
-#define BRW_EXECUTE_32    5
+enum PACKED brw_execution_size {
+   BRW_EXECUTE_1  = 0,
+   BRW_EXECUTE_2  = 1,
+   BRW_EXECUTE_4  = 2,
+   BRW_EXECUTE_8  = 3,
+   BRW_EXECUTE_16 = 4,
+   BRW_EXECUTE_32 = 5,
+};
  
-#define BRW_HORIZONTAL_STRIDE_0   0
-#define BRW_HORIZONTAL_STRIDE_1   1
-#define BRW_HORIZONTAL_STRIDE_2   2
-#define BRW_HORIZONTAL_STRIDE_4   3
+enum PACKED brw_horizontal_stride {
+   BRW_HORIZONTAL_STRIDE_0 = 0,
+   BRW_HORIZONTAL_STRIDE_1 = 1,
+   BRW_HORIZONTAL_STRIDE_2 = 2,
+   BRW_HORIZONTAL_STRIDE_4 = 3,
+};
  
  #define BRW_INSTRUCTION_NORMAL    0
  #define BRW_INSTRUCTION_SATURATE  1
@@ -711,15 +816,16 @@ enum opcode {
     BRW_OPCODE_ASR =    12,
     BRW_OPCODE_CMP =    16,
     BRW_OPCODE_CMPN =   17,
-   BRW_OPCODE_F32TO16 = 19,
-   BRW_OPCODE_F16TO32 = 20,
-   BRW_OPCODE_BFREV =  23,
-   BRW_OPCODE_BFE =    24,
-   BRW_OPCODE_BFI1 =   25,
-   BRW_OPCODE_BFI2 =   26,
+   BRW_OPCODE_CSEL =   18,  /**< Gen8+ */
+   BRW_OPCODE_F32TO16 = 19,  /**< Gen7 only */
+   BRW_OPCODE_F16TO32 = 20,  /**< Gen7 only */
+   BRW_OPCODE_BFREV =  23,  /**< Gen7+ */
+   BRW_OPCODE_BFE =    24,  /**< Gen7+ */
+   BRW_OPCODE_BFI1 =   25,  /**< Gen7+ */
+   BRW_OPCODE_BFI2 =   26,  /**< Gen7+ */
     BRW_OPCODE_JMPI =   32,
     BRW_OPCODE_IF =     34,
-   BRW_OPCODE_IFF =    35,
+   BRW_OPCODE_IFF =    35,  /**< Pre-Gen6 */
     BRW_OPCODE_ELSE =   36,
     BRW_OPCODE_ENDIF =  37,
     BRW_OPCODE_DO =     38,
@@ -727,14 +833,15 @@ enum opcode {
     BRW_OPCODE_BREAK =  40,
     BRW_OPCODE_CONTINUE = 41,
     BRW_OPCODE_HALT =   42,
-   BRW_OPCODE_MSAVE =  44,
-   BRW_OPCODE_MRESTORE = 45,
-   BRW_OPCODE_PUSH =   46,
-   BRW_OPCODE_POP =    47,
+   BRW_OPCODE_MSAVE =  44,  /**< Pre-Gen6 */
+   BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */
+   BRW_OPCODE_PUSH =   46,  /**< Pre-Gen6 */
+   BRW_OPCODE_GOTO =   46,  /**< Gen8+    */
+   BRW_OPCODE_POP =    47,  /**< Pre-Gen6 */
     BRW_OPCODE_WAIT =   48,
     BRW_OPCODE_SEND =   49,
     BRW_OPCODE_SENDC =  50,
-   BRW_OPCODE_MATH =   56,
+   BRW_OPCODE_MATH =   56,  /**< Gen6+ */
     BRW_OPCODE_ADD =    64,
     BRW_OPCODE_MUL =    65,
     BRW_OPCODE_AVG =    66,
@@ -746,11 +853,11 @@ enum opcode {
     BRW_OPCODE_MAC =    72,
     BRW_OPCODE_MACH =   73,
     BRW_OPCODE_LZD =    74,
-   BRW_OPCODE_FBH =    75,
-   BRW_OPCODE_FBL =    76,
-   BRW_OPCODE_CBIT =   77,
-   BRW_OPCODE_ADDC =   78,
-   BRW_OPCODE_SUBB =   79,
+   BRW_OPCODE_FBH =    75,  /**< Gen7+ */
+   BRW_OPCODE_FBL =    76,  /**< Gen7+ */
+   BRW_OPCODE_CBIT =   77,  /**< Gen7+ */
+   BRW_OPCODE_ADDC =   78,  /**< Gen7+ */
+   BRW_OPCODE_SUBB =   79,  /**< Gen7+ */
     BRW_OPCODE_SAD2 =   80,
     BRW_OPCODE_SADA2 =  81,
     BRW_OPCODE_DP4 =    84,
@@ -758,16 +865,33 @@ enum opcode {
     BRW_OPCODE_DP3 =    86,
     BRW_OPCODE_DP2 =    87,
     BRW_OPCODE_LINE =   89,
-   BRW_OPCODE_PLN =    90,
-   BRW_OPCODE_MAD =    91,
-   BRW_OPCODE_LRP =    92,
+   BRW_OPCODE_PLN =    90,  /**< G45+ */
+   BRW_OPCODE_MAD =    91,  /**< Gen6+ */
+   BRW_OPCODE_LRP =    92,  /**< Gen6+ */
+   BRW_OPCODE_NENOP =  125, /**< G45 only */
     BRW_OPCODE_NOP =    126,
  
     /* These are compiler backend opcodes that get translated into other
      * instructions.
      */
     FS_OPCODE_FB_WRITE = 128,
+
+   /**
+    * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
+    * individual sources instead of as a single payload blob:
+    *
+    * Source 0: [required] Color 0.
+    * Source 1: [optional] Color 1 (for dual source blend messages).
+    * Source 2: [optional] Src0 Alpha.
+    * Source 3: [optional] Source Depth (passthrough from the thread payload).
+    * Source 4: [optional] Destination Depth (gl_FragDepth).
+    * Source 5: [optional] Sample Mask (gl_SampleMask).
+    * Source 6: [required] Number of color components (as a UD immediate).
+    */
+   FS_OPCODE_FB_WRITE_LOGICAL,
+
     FS_OPCODE_BLORP_FB_WRITE,
+   FS_OPCODE_REP_FB_WRITE,
     SHADER_OPCODE_RCP,
     SHADER_OPCODE_RSQ,
     SHADER_OPCODE_SQRT,
@@ -792,21 +916,68 @@ enum opcode {
     SHADER_OPCODE_TG4,
     SHADER_OPCODE_TG4_OFFSET,
  
+   /**
+    * Combines multiple sources of size 1 into a larger virtual GRF.
+    * For example, parameters for a send-from-GRF message.  Or, updating
+    * channels of a size 4 VGRF used to store vec4s such as texturing results.
+    *
+    * This will be lowered into MOVs from each source to consecutive reg_offsets
+    * of the destination VGRF.
+    *
+    * src[0] may be BAD_FILE.  If so, the lowering pass skips emitting the MOV,
+    * but still reserves the first channel of the destination VGRF.  This can be
+    * used to reserve space for, say, a message header set up by the generators.
+    */
+   SHADER_OPCODE_LOAD_PAYLOAD,
+
     SHADER_OPCODE_SHADER_TIME_ADD,
  
     SHADER_OPCODE_UNTYPED_ATOMIC,
     SHADER_OPCODE_UNTYPED_SURFACE_READ,
+   SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+
+   SHADER_OPCODE_TYPED_ATOMIC,
+   SHADER_OPCODE_TYPED_SURFACE_READ,
+   SHADER_OPCODE_TYPED_SURFACE_WRITE,
+
+   SHADER_OPCODE_MEMORY_FENCE,
  
     SHADER_OPCODE_GEN4_SCRATCH_READ,
     SHADER_OPCODE_GEN4_SCRATCH_WRITE,
     SHADER_OPCODE_GEN7_SCRATCH_READ,
  
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
-   FS_OPCODE_PIXEL_X,
-   FS_OPCODE_PIXEL_Y,
+   SHADER_OPCODE_URB_WRITE_SIMD8,
+
+   /**
+    * Return the index of an arbitrary live channel (i.e. one of the channels
+    * enabled in the current execution mask) and assign it to the first
+    * component of the destination.  Expected to be used as input for the
+    * BROADCAST pseudo-opcode.
+    */
+   SHADER_OPCODE_FIND_LIVE_CHANNEL,
+
+   /**
+    * Pick the channel from its first source register given by the index
+    * specified as second source.  Useful for variable indexing of surfaces.
+    */
+   SHADER_OPCODE_BROADCAST,
+
+   VEC4_OPCODE_MOV_BYTES,
+   VEC4_OPCODE_PACK_BYTES,
+   VEC4_OPCODE_UNPACK_UNIFORM,
+
+   FS_OPCODE_DDX_COARSE,
+   FS_OPCODE_DDX_FINE,
+   /**
+    * Compute dFdy(), dFdyCoarse(), or dFdyFine().
+    * src1 is an immediate storing the key->render_to_fbo boolean.
+    */
+   FS_OPCODE_DDY_COARSE,
+   FS_OPCODE_DDY_FINE,
     FS_OPCODE_CINTERP,
     FS_OPCODE_LINTERP,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
     FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
     FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
@@ -820,10 +991,15 @@ enum opcode {
     FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
     FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
     FS_OPCODE_PLACEHOLDER_HALT,
+   FS_OPCODE_INTERPOLATE_AT_CENTROID,
+   FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+   FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+   FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
  
     VS_OPCODE_URB_WRITE,
     VS_OPCODE_PULL_CONSTANT_LOAD,
     VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+   VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
     VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
  
     /**
@@ -835,6 +1011,14 @@ enum opcode {
      */
     GS_OPCODE_URB_WRITE,
  
+   /**
+    * Write geometry shader output data to the URB and request a new URB
+    * handle (gen6).
+    *
+    * This opcode doesn't do an implied move from R0 to the first MRF.
+    */
+   GS_OPCODE_URB_WRITE_ALLOCATE,
+
     /**
      * Terminate the geometry shader thread by doing an empty URB write.
      *
@@ -871,11 +1055,9 @@ enum opcode {
     GS_OPCODE_SET_VERTEX_COUNT,
  
     /**
-    * Set DWORD 2 of dst to the immediate value in src.  Used by geometry
-    * shaders to initialize DWORD 2 of R0, which needs to be 0 in order for
-    * scratch reads and writes to operate correctly.
+    * Set DWORD 2 of dst to the value in src.
      */
-   GS_OPCODE_SET_DWORD_2_IMMED,
+   GS_OPCODE_SET_DWORD_2,
  
     /**
      * Prepare the dst register for storage in the "Channel Mask" fields of a
@@ -909,6 +1091,79 @@ enum opcode {
      * - dst is the GRF for gl_InvocationID.
      */
     GS_OPCODE_GET_INSTANCE_ID,
+
+   /**
+    * Send a FF_SYNC message to allocate initial URB handles (gen6).
+    *
+    * - dst will be used as the writeback register for the FF_SYNC operation.
+    *
+    * - src0 is the number of primitives written.
+    *
+    * - src1 is the value to hold in M0.0: number of SO vertices to write
+    *   and number of SO primitives needed. Its value will be overwritten
+    *   with the SVBI values if transform feedback is enabled.
+    *
+    * Note: This opcode uses an implicit MRF register for the ff_sync message
+    * header, so the caller is expected to set inst->base_mrf and initialize
+    * that MRF register to r0. This opcode will also write to this MRF register
+    * to include the allocated URB handle so it can then be reused directly as
+    * the header in the URB write operation we are allocating the handle for.
+    */
+   GS_OPCODE_FF_SYNC,
+
+   /**
+    * Move r0.1 (which holds PrimitiveID information in gen6) to a separate
+    * register.
+    *
+    * - dst is the GRF where PrimitiveID information will be moved.
+    */
+   GS_OPCODE_SET_PRIMITIVE_ID,
+
+   /**
+    * Write transform feedback data to the SVB by sending a SVB WRITE message.
+    * Used in gen6.
+    *
+    * - dst is the MRF register containing the message header.
+    *
+    * - src0 is the register where the vertex data is going to be copied from.
+    *
+    * - src1 is the destination register when write commit occurs.
+    */
+   GS_OPCODE_SVB_WRITE,
+
+   /**
+    * Set destination index in the SVB write message payload (M0.5). Used
+    * in gen6 for transform feedback.
+    *
+    * - dst is the header to save the destination indices for SVB WRITE.
+    * - src is the register that holds the destination indices value.
+    */
+   GS_OPCODE_SVB_SET_DST_INDEX,
+
+   /**
+    * Prepare Mx.0 subregister for being used in the FF_SYNC message header.
+    * Used in gen6 for transform feedback.
+    *
+    * - dst will hold the register with the final Mx.0 value.
+    *
+    * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
+    *
+    * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
+    *
+    * - src2 is the value to hold in M0: number of SO vertices to write
+    *   and number of SO primitives needed.
+    */
+   GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+
+   /**
+    * Terminate the compute shader.
+    */
+   CS_OPCODE_CS_TERMINATE,
+
+   /**
+    * GLSL barrier()
+    */
+   SHADER_OPCODE_BARRIER,
  };
  
  enum brw_urb_write_flags {
@@ -982,24 +1237,28 @@ operator|(brw_urb_write_flags x, brw_urb_write_flags y)
  }
  #endif
  
-#define BRW_PREDICATE_NONE             0
-#define BRW_PREDICATE_NORMAL           1
-#define BRW_PREDICATE_ALIGN1_ANYV             2
-#define BRW_PREDICATE_ALIGN1_ALLV             3
-#define BRW_PREDICATE_ALIGN1_ANY2H            4
-#define BRW_PREDICATE_ALIGN1_ALL2H            5
-#define BRW_PREDICATE_ALIGN1_ANY4H            6
-#define BRW_PREDICATE_ALIGN1_ALL4H            7
-#define BRW_PREDICATE_ALIGN1_ANY8H            8
-#define BRW_PREDICATE_ALIGN1_ALL8H            9
-#define BRW_PREDICATE_ALIGN1_ANY16H           10
-#define BRW_PREDICATE_ALIGN1_ALL16H           11
-#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
-#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
-#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
-#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
-#define BRW_PREDICATE_ALIGN16_ANY4H           6
-#define BRW_PREDICATE_ALIGN16_ALL4H           7
+enum PACKED brw_predicate {
+   BRW_PREDICATE_NONE                =  0,
+   BRW_PREDICATE_NORMAL              =  1,
+   BRW_PREDICATE_ALIGN1_ANYV         =  2,
+   BRW_PREDICATE_ALIGN1_ALLV         =  3,
+   BRW_PREDICATE_ALIGN1_ANY2H        =  4,
+   BRW_PREDICATE_ALIGN1_ALL2H        =  5,
+   BRW_PREDICATE_ALIGN1_ANY4H        =  6,
+   BRW_PREDICATE_ALIGN1_ALL4H        =  7,
+   BRW_PREDICATE_ALIGN1_ANY8H        =  8,
+   BRW_PREDICATE_ALIGN1_ALL8H        =  9,
+   BRW_PREDICATE_ALIGN1_ANY16H       = 10,
+   BRW_PREDICATE_ALIGN1_ALL16H       = 11,
+   BRW_PREDICATE_ALIGN1_ANY32H       = 12,
+   BRW_PREDICATE_ALIGN1_ALL32H       = 13,
+   BRW_PREDICATE_ALIGN16_REPLICATE_X =  2,
+   BRW_PREDICATE_ALIGN16_REPLICATE_Y =  3,
+   BRW_PREDICATE_ALIGN16_REPLICATE_Z =  4,
+   BRW_PREDICATE_ALIGN16_REPLICATE_W =  5,
+   BRW_PREDICATE_ALIGN16_ANY4H       =  6,
+   BRW_PREDICATE_ALIGN16_ALL4H       =  7,
+};
  
  #define BRW_ARCHITECTURE_REGISTER_FILE    0
  #define BRW_GENERAL_REGISTER_FILE         1
@@ -1063,23 +1322,24 @@ operator|(brw_urb_write_flags x, brw_urb_write_flags y)
  #define BRW_THREAD_ATOMIC     1
  #define BRW_THREAD_SWITCH     2
  
-#define BRW_VERTICAL_STRIDE_0                 0
-#define BRW_VERTICAL_STRIDE_1                 1
-#define BRW_VERTICAL_STRIDE_2                 2
-#define BRW_VERTICAL_STRIDE_4                 3
-#define BRW_VERTICAL_STRIDE_8                 4
-#define BRW_VERTICAL_STRIDE_16                5
-#define BRW_VERTICAL_STRIDE_32                6
-#define BRW_VERTICAL_STRIDE_64                7
-#define BRW_VERTICAL_STRIDE_128               8
-#define BRW_VERTICAL_STRIDE_256               9
-#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
-
-#define BRW_WIDTH_1       0
-#define BRW_WIDTH_2       1
-#define BRW_WIDTH_4       2
-#define BRW_WIDTH_8       3
-#define BRW_WIDTH_16      4
+enum PACKED brw_vertical_stride {
+   BRW_VERTICAL_STRIDE_0               = 0,
+   BRW_VERTICAL_STRIDE_1               = 1,
+   BRW_VERTICAL_STRIDE_2               = 2,
+   BRW_VERTICAL_STRIDE_4               = 3,
+   BRW_VERTICAL_STRIDE_8               = 4,
+   BRW_VERTICAL_STRIDE_16              = 5,
+   BRW_VERTICAL_STRIDE_32              = 6,
+   BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL = 0xF,
+};
+
+enum PACKED brw_width {
+   BRW_WIDTH_1  = 0,
+   BRW_WIDTH_2  = 1,
+   BRW_WIDTH_4  = 2,
+   BRW_WIDTH_8  = 3,
+   BRW_WIDTH_16 = 4,
+};
  
  #define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
  #define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
@@ -1177,6 +1437,11 @@ enum brw_message_target {
  #define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
  #define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
  
+/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2
+ * behavior by setting bit 22 of dword 2 in the message header. */
+#define GEN9_SAMPLER_SIMD_MODE_SIMD8D                   0
+#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2        (1 << 22)
+
  #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
  #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
  #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
@@ -1237,7 +1502,13 @@ enum brw_message_target {
  #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
  
  /* GEN7 */
-#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          10
+#define GEN7_DATAPORT_RC_MEDIA_BLOCK_READ                           4
+#define GEN7_DATAPORT_RC_TYPED_SURFACE_READ                         5
+#define GEN7_DATAPORT_RC_TYPED_ATOMIC_OP                            6
+#define GEN7_DATAPORT_RC_MEMORY_FENCE                               7
+#define GEN7_DATAPORT_RC_MEDIA_BLOCK_WRITE                          10
+#define GEN7_DATAPORT_RC_RENDER_TARGET_WRITE                        12
+#define GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE                        13
  #define GEN7_DATAPORT_DC_OWORD_BLOCK_READ                           0
  #define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ                 1
  #define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ                      2
@@ -1258,6 +1529,11 @@ enum brw_message_target {
                                                                 (1 << 17))
  #define GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT                        12
  
+#define GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET     0
+#define GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE            1
+#define GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID          2
+#define GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET   3
+
  /* HSW */
  #define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ                      0
  #define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ            1
@@ -1330,6 +1606,7 @@ enum brw_message_target {
  
  #define BRW_URB_OPCODE_WRITE_HWORD  0
  #define BRW_URB_OPCODE_WRITE_OWORD  1
+#define GEN8_URB_OPCODE_SIMD8_WRITE  7
  
  #define BRW_URB_SWIZZLE_NONE          0
  #define BRW_URB_SWIZZLE_INTERLEAVE    1
@@ -1348,6 +1625,14 @@ enum brw_message_target {
  #define BRW_SCRATCH_SPACE_SIZE_1M     10
  #define BRW_SCRATCH_SPACE_SIZE_2M     11
  
+#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY         0
+#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY        1
+#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG          2
+#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP        3
+#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG          4
+#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5
+#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE      6
+
  
  #define CMD_URB_FENCE                 0x6000
  #define CMD_CS_URB_STATE              0x6001
@@ -1370,6 +1655,36 @@ enum brw_message_target {
  #define _3DSTATE_BINDING_TABLE_POINTERS_GS     0x7829 /* GEN7+ */
  #define _3DSTATE_BINDING_TABLE_POINTERS_PS     0x782A /* GEN7+ */
  
+#define _3DSTATE_BINDING_TABLE_POOL_ALLOC       0x7919 /* GEN7.5+ */
+#define BRW_HW_BINDING_TABLE_ENABLE             (1 << 11)
+#define GEN7_HW_BT_POOL_MOCS_SHIFT              7
+#define GEN7_HW_BT_POOL_MOCS_MASK               INTEL_MASK(10, 7)
+#define GEN8_HW_BT_POOL_MOCS_SHIFT              0
+#define GEN8_HW_BT_POOL_MOCS_MASK               INTEL_MASK(6, 0)
+/* Only required in HSW */
+#define HSW_BT_POOL_ALLOC_MUST_BE_ONE           (3 << 5)
+
+#define _3DSTATE_BINDING_TABLE_EDIT_VS          0x7843 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_GS          0x7844 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_HS          0x7845 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_DS          0x7846 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_PS          0x7847 /* GEN7.5 */
+#define BRW_BINDING_TABLE_INDEX_SHIFT           16
+#define BRW_BINDING_TABLE_INDEX_MASK            INTEL_MASK(23, 16)
+
+#define BRW_BINDING_TABLE_EDIT_TARGET_ALL       3
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1     2
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0     1
+/* In HSW, when editing binding table entries to surface state offsets,
+ * the surface state offset is a 16-bit value aligned to 32 bytes. But
+ * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset
+ * by 5 bits so it won't disturb bit 16 (which is used as the binding
+ * table index entry), otherwise it would hang the GPU.
+ */
+#define HSW_SURFACE_STATE_EDIT(value)           (value >> 5)
+/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/
+#define GEN8_SURFACE_STATE_EDIT(value)          (value >> 6)
+
  #define _3DSTATE_SAMPLER_STATE_POINTERS                0x7802 /* GEN6+ */
  # define PS_SAMPLER_STATE_CHANGE                               (1 << 12)
  # define GS_SAMPLER_STATE_CHANGE                               (1 << 9)
@@ -1452,10 +1767,14 @@ enum brw_message_target {
  # define GEN7_URB_ENTRY_SIZE_SHIFT                      16
  # define GEN7_URB_STARTING_ADDRESS_SHIFT                25
  
-/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
+/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
   * is 2^9, or 512.  It's counted in multiples of 64 bytes.
   */
-#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES               (512*64)
+#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES                (512*64)
+/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
+ * (128 bytes) URB rows and the maximum allowed value is 5 rows.
+ */
+#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
  
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
@@ -1466,7 +1785,7 @@ enum brw_message_target {
  # define GEN6_CC_VIEWPORT_MODIFY                       (1 << 12)
  # define GEN6_SF_VIEWPORT_MODIFY                       (1 << 11)
  # define GEN6_CLIP_VIEWPORT_MODIFY                     (1 << 10)
-# define GEN7_NUM_VIEWPORTS                            16
+# define GEN6_NUM_VIEWPORTS                            16
  
  #define _3DSTATE_VIEWPORT_STATE_POINTERS_CC    0x7823 /* GEN7+ */
  #define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
@@ -1491,6 +1810,8 @@ enum brw_message_target {
  # define GEN6_VS_STATISTICS_ENABLE                     (1 << 10)
  # define GEN6_VS_CACHE_DISABLE                         (1 << 1)
  # define GEN6_VS_ENABLE                                        (1 << 0)
+/* Gen8+ DW7 */
+# define GEN8_VS_SIMD8_ENABLE                           (1 << 2)
  /* Gen8+ DW8 */
  # define GEN8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
  # define GEN8_VS_URB_OUTPUT_LENGTH_SHIFT                16
@@ -1518,9 +1839,9 @@ enum brw_message_target {
  # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT         0
  # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID         1
  # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT                20
-# define GEN7_GS_DISPATCH_MODE_SINGLE                  (0 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE           (1 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT             (2 << 11)
+# define GEN7_GS_INSTANCE_CONTROL_SHIFT                        15
+# define GEN7_GS_DISPATCH_MODE_SHIFT                    11
+# define GEN7_GS_DISPATCH_MODE_MASK                     INTEL_MASK(12, 11)
  # define GEN6_GS_STATISTICS_ENABLE                     (1 << 10)
  # define GEN6_GS_SO_STATISTICS_ENABLE                  (1 << 9)
  # define GEN6_GS_RENDERING_ENABLE                      (1 << 8)
@@ -1605,6 +1926,7 @@ enum brw_message_target {
  # define GEN6_SF_SWIZZLE_ENABLE                                (1 << 21)
  # define GEN6_SF_POINT_SPRITE_UPPERLEFT                        (0 << 20)
  # define GEN6_SF_POINT_SPRITE_LOWERLEFT                        (1 << 20)
+# define GEN9_SF_LINE_WIDTH_SHIFT                      12 /* U11.7 */
  # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT           11
  # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT           4
  /* DW2 */
@@ -1688,7 +2010,7 @@ enum brw_message_target {
  /* GEN7/DW1: */
  # define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT     12
  /* GEN7/DW2: */
-# define HSW_SF_LINE_STIPPLE_ENABLE                    14
+# define HSW_SF_LINE_STIPPLE_ENABLE                    (1 << 14)
  
  # define GEN8_SF_SMOOTH_POINT_ENABLE                    (1 << 13)
  
@@ -1709,10 +2031,17 @@ enum brw_message_target {
  /* DW12: attr 0-7 wrap shortest enables */
  /* DW13: attr 8-16 wrap shortest enables */
  
+/* DW4-5: Attribute active components (gen9) */
+#define GEN9_SBE_ACTIVE_COMPONENT_NONE                 0
+#define GEN9_SBE_ACTIVE_COMPONENT_XY                   1
+#define GEN9_SBE_ACTIVE_COMPONENT_XYZ                  2
+#define GEN9_SBE_ACTIVE_COMPONENT_XYZW                 3
+
  #define _3DSTATE_SBE_SWIZ                       0x7851 /* GEN8+ */
  
  #define _3DSTATE_RASTER                         0x7850 /* GEN8+ */
  /* DW1 */
+# define GEN9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE    (1 << 26)
  # define GEN8_RASTER_FRONT_WINDING_CCW                  (1 << 21)
  # define GEN8_RASTER_CULL_BOTH                          (0 << 16)
  # define GEN8_RASTER_CULL_NONE                          (1 << 16)
@@ -1723,6 +2052,7 @@ enum brw_message_target {
  # define GEN8_RASTER_LINE_AA_ENABLE                     (1 << 2)
  # define GEN8_RASTER_SCISSOR_ENABLE                     (1 << 1)
  # define GEN8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE        (1 << 0)
+# define GEN9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE   (1 << 0)
  
  /* Gen8 BLEND_STATE */
  /* DW0 */
@@ -1833,6 +2163,18 @@ enum brw_message_target {
  # define GEN8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT          8
  # define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_MASK          INTEL_MASK(7, 0)
  # define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT         0
+/* DW3 */
+# define GEN9_WM_DS_STENCIL_REF_MASK                    INTEL_MASK(15, 8)
+# define GEN9_WM_DS_STENCIL_REF_SHIFT                   8
+# define GEN9_WM_DS_BF_STENCIL_REF_MASK                 INTEL_MASK(7, 0)
+# define GEN9_WM_DS_BF_STENCIL_REF_SHIFT                0
+
+enum brw_pixel_shader_computed_depth_mode {
+   BRW_PSCDEPTH_OFF   = 0, /* PS does not compute depth */
+   BRW_PSCDEPTH_ON    = 1, /* PS computes depth; no guarantee about value */
+   BRW_PSCDEPTH_ON_GE = 2, /* PS guarantees output depth >= source depth */
+   BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
+};
  
  #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
  /* DW1 */
@@ -1840,10 +2182,7 @@ enum brw_message_target {
  # define GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE              (1 << 30)
  # define GEN8_PSX_OMASK_TO_RENDER_TARGET                (1 << 29)
  # define GEN8_PSX_KILL_ENABLE                           (1 << 28)
-# define GEN8_PSX_PSCDEPTH_OFF                          (0 << 26)
-# define GEN8_PSX_PSCDEPTH_ON                           (1 << 26)
-# define GEN8_PSX_PSCDEPTH_ON_GE                        (2 << 26)
-# define GEN8_PSX_PSCDEPTH_ON_LE                        (3 << 26)
+# define GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT             26
  # define GEN8_PSX_FORCE_COMPUTED_DEPTH                  (1 << 25)
  # define GEN8_PSX_USES_SOURCE_DEPTH                     (1 << 24)
  # define GEN8_PSX_USES_SOURCE_W                         (1 << 23)
@@ -1851,6 +2190,7 @@ enum brw_message_target {
  # define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE     (1 << 7)
  # define GEN8_PSX_SHADER_IS_PER_SAMPLE                  (1 << 6)
  # define GEN8_PSX_SHADER_COMPUTES_STENCIL               (1 << 5)
+# define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
  # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
  # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
  
@@ -1985,10 +2325,7 @@ enum brw_wm_barycentric_interp_mode {
  # define GEN7_WM_DEPTH_RESOLVE                         (1 << 28)
  # define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE            (1 << 27)
  # define GEN7_WM_KILL_ENABLE                           (1 << 25)
-# define GEN7_WM_PSCDEPTH_OFF                          (0 << 23)
-# define GEN7_WM_PSCDEPTH_ON                           (1 << 23)
-# define GEN7_WM_PSCDEPTH_ON_GE                                (2 << 23)
-# define GEN7_WM_PSCDEPTH_ON_LE                                (3 << 23)
+# define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT              23
  # define GEN7_WM_USES_SOURCE_DEPTH                     (1 << 20)
  # define GEN7_WM_USES_SOURCE_W                         (1 << 19)
  # define GEN7_WM_POSITION_ZW_PIXEL                     (0 << 17)
@@ -2021,6 +2358,7 @@ enum brw_wm_barycentric_interp_mode {
  # define GEN7_PS_SPF_MODE                              (1 << 31)
  # define GEN7_PS_VECTOR_MASK_ENABLE                    (1 << 30)
  # define GEN7_PS_SAMPLER_COUNT_SHIFT                   27
+# define GEN7_PS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
  # define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
  # define GEN7_PS_FLOATING_POINT_MODE_IEEE_754          (0 << 16)
  # define GEN7_PS_FLOATING_POINT_MODE_ALT               (1 << 16)
@@ -2191,6 +2529,59 @@ enum brw_wm_barycentric_interp_mode {
  #define HSW_MOCS_WB_LLC_WB_ELLC         (2 << 1)
  #define HSW_MOCS_UC_LLC_WB_ELLC         (3 << 1)
  
-#include "intel_chipset.h"
+/* Broadwell: these defines always use all available caches (L3, LLC, eLLC),
+ * and let you force write-back (WB) or write-through (WT) caching, or leave
+ * it up to the page table entry (PTE) specified by the kernel.
+ */
+#define BDW_MOCS_WB  0x78
+#define BDW_MOCS_WT  0x58
+#define BDW_MOCS_PTE 0x18
+
+/* Skylake: MOCS is now an index into an array of 62 different caching
+ * configurations programmed by the kernel.
+ */
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define SKL_MOCS_WB  (2 << 1)
+/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE (1 << 1)
+
+#define MEDIA_VFE_STATE                         0x7000
+/* GEN7 DW2, GEN8+ DW3 */
+# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT      16
+# define MEDIA_VFE_STATE_MAX_THREADS_MASK       INTEL_MASK(31, 16)
+# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT      8
+# define MEDIA_VFE_STATE_URB_ENTRIES_MASK       INTEL_MASK(15, 8)
+# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT  7
+# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK   INTEL_MASK(7, 7)
+# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT       6
+# define MEDIA_VFE_STATE_BYPASS_GTW_MASK        INTEL_MASK(6, 6)
+# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT  2
+# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_MASK   INTEL_MASK(2, 2)
+/* GEN7 DW4, GEN8+ DW5 */
+# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT        16
+# define MEDIA_VFE_STATE_URB_ALLOC_MASK         INTEL_MASK(31, 16)
+# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
+# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
+
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
+# define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
+#define MEDIA_STATE_FLUSH                       0x7004
+#define GPGPU_WALKER                            0x7105
+/* GEN8+ DW2 */
+# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
+# define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
+/* GEN7 DW2, GEN8+ DW4 */
+# define GPGPU_WALKER_SIMD_SIZE_SHIFT           30
+# define GPGPU_WALKER_SIMD_SIZE_MASK            INTEL_MASK(31, 30)
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT    16
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK     INTEL_MASK(21, 16)
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT   8
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK    INTEL_MASK(31, 8)
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT    0
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK     INTEL_MASK(5, 0)
  
  #endif