i965: Add new SIMD8 VS prog data flag
[mesa.git] / src / mesa / drivers / dri / i965 / brw_defines.h
index bad6d40fca875f183da3560126eee3d898e9fd36..28e398d0b88bd3f1a2d03170ac77acc95b7d9434 100644 (file)
@@ -1,8 +1,8 @@
 /*
  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ Intel funded Tungsten Graphics to
  develop this 3D driver.
+
  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:
+
  The above copyright notice and this permission notice (including the
  next paragraph) shall be included in all copies or substantial
  portions of the Software.
+
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
  **********************************************************************/
  /*
   * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Keith Whitwell <keithw@vmware.com>
   */
 
 #define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
-#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+/* Using the GNU statement expression extension */
+#define SET_FIELD(value, field)                                         \
+   ({                                                                   \
+      uint32_t fieldval = (value) << field ## _SHIFT;                   \
+      assert((fieldval & ~ field ## _MASK) == 0);                       \
+      fieldval & field ## _MASK;                                        \
+   })
+
 #define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
 
 #ifndef BRW_DEFINES_H
 
 /* 3D state:
  */
-#define PIPE_CONTROL_NOWRITE          0x00
-#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
-#define PIPE_CONTROL_WRITEDEPTH       0x02
-#define PIPE_CONTROL_WRITETIMESTAMP   0x03
-
-#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
-#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
-
 #define CMD_3D_PRIM                                 0x7b00 /* 3DPRIMITIVE */
 /* DW0 */
 # define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
+# define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE      (1 << 10)
 /* DW1 */
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
 #define _3DPRIM_LINESTRIP_CONT_BF 0x14
 #define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
 
+/* We use this offset to be able to pass native primitive types in struct
+ * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
+ * native_type, which should be different from all GL types and still fit in
+ * the 8 bits avialable. */
+
+#define BRW_PRIM_OFFSET           0x80
+
 #define BRW_ANISORATIO_2     0
 #define BRW_ANISORATIO_4     1
 #define BRW_ANISORATIO_6     2
 #define BRW_STENCILOP_INVERT             7
 
 /* Surface state DW0 */
+#define GEN8_SURFACE_IS_ARRAY                       (1 << 28)
+#define GEN8_SURFACE_VALIGN_4                       (1 << 16)
+#define GEN8_SURFACE_VALIGN_8                       (2 << 16)
+#define GEN8_SURFACE_VALIGN_16                      (3 << 16)
+#define GEN8_SURFACE_HALIGN_4                       (1 << 14)
+#define GEN8_SURFACE_HALIGN_8                       (2 << 14)
+#define GEN8_SURFACE_HALIGN_16                      (3 << 14)
+#define GEN8_SURFACE_TILING_NONE                    (0 << 12)
+#define GEN8_SURFACE_TILING_W                       (1 << 12)
+#define GEN8_SURFACE_TILING_X                       (2 << 12)
+#define GEN8_SURFACE_TILING_Y                       (3 << 12)
 #define BRW_SURFACE_RC_READ_WRITE      (1 << 8)
 #define BRW_SURFACE_MIPLAYOUT_SHIFT    10
 #define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
 #define GEN7_SURFACE_ARYSPC_FULL       (0 << 10)
 #define GEN7_SURFACE_ARYSPC_LOD0       (1 << 10)
 
+/* Surface state DW0 */
+#define GEN8_SURFACE_MOCS_SHIFT         24
+#define GEN8_SURFACE_MOCS_MASK          INTEL_MASK(30, 24)
+
 /* Surface state DW2 */
 #define BRW_SURFACE_HEIGHT_SHIFT       19
 #define BRW_SURFACE_HEIGHT_MASK                INTEL_MASK(31, 19)
 /* Surface state DW4 */
 #define BRW_SURFACE_MIN_LOD_SHIFT      28
 #define BRW_SURFACE_MIN_LOD_MASK       INTEL_MASK(31, 28)
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT    17
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK     INTEL_MASK(27, 17)
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT    8
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK     INTEL_MASK(16, 8)
 #define BRW_SURFACE_MULTISAMPLECOUNT_1  (0 << 4)
 #define BRW_SURFACE_MULTISAMPLECOUNT_4  (2 << 4)
 #define GEN7_SURFACE_MULTISAMPLECOUNT_1         (0 << 3)
+#define GEN8_SURFACE_MULTISAMPLECOUNT_2         (1 << 3)
 #define GEN7_SURFACE_MULTISAMPLECOUNT_4         (2 << 3)
 #define GEN7_SURFACE_MULTISAMPLECOUNT_8         (3 << 3)
+#define GEN8_SURFACE_MULTISAMPLECOUNT_16        (4 << 3)
 #define GEN7_SURFACE_MSFMT_MSS                  (0 << 6)
 #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL        (1 << 6)
 #define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT   18
+#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_MASK     INTEL_MASK(28, 18)
 #define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT   7
+#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK   INTEL_MASK(17, 7)
 
 /* Surface state DW5 */
 #define BRW_SURFACE_X_OFFSET_SHIFT             25
 #define BRW_SURFACE_Y_OFFSET_MASK              INTEL_MASK(23, 20)
 #define GEN7_SURFACE_MIN_LOD_SHIFT              4
 #define GEN7_SURFACE_MIN_LOD_MASK               INTEL_MASK(7, 4)
+#define GEN8_SURFACE_Y_OFFSET_SHIFT            21
+#define GEN8_SURFACE_Y_OFFSET_MASK             INTEL_MASK(23, 21)
 
 #define GEN7_SURFACE_MOCS_SHIFT                 16
 #define GEN7_SURFACE_MOCS_MASK                  INTEL_MASK(19, 16)
 #define GEN7_SURFACE_MCS_ENABLE                 (1 << 0)
 #define GEN7_SURFACE_MCS_PITCH_SHIFT            3
 #define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
+#define GEN8_SURFACE_AUX_QPITCH_SHIFT           16
+#define GEN8_SURFACE_AUX_QPITCH_MASK            INTEL_MASK(30, 16)
+#define GEN8_SURFACE_AUX_PITCH_SHIFT            3
+#define GEN8_SURFACE_AUX_PITCH_MASK             INTEL_MASK(11, 3)
+#define GEN8_SURFACE_AUX_MODE_MASK              INTEL_MASK(2, 0)
+
+#define GEN8_SURFACE_AUX_MODE_NONE              0
+#define GEN8_SURFACE_AUX_MODE_MCS               1
+#define GEN8_SURFACE_AUX_MODE_APPEND            2
+#define GEN8_SURFACE_AUX_MODE_HIZ               3
 
 /* Surface state DW7 */
 #define GEN7_SURFACE_CLEAR_COLOR_SHIFT         28
 #define HSW_SCS_BLUE                     6
 #define HSW_SCS_ALPHA                    7
 
-#define BRW_TEXCOORDMODE_WRAP            0
-#define BRW_TEXCOORDMODE_MIRROR          1
-#define BRW_TEXCOORDMODE_CLAMP           2
-#define BRW_TEXCOORDMODE_CUBE            3
-#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
-#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+/* SAMPLER_STATE DW0 */
+#define BRW_SAMPLER_DISABLE                     (1 << 31)
+#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE         (1 << 28)
+#define GEN6_SAMPLER_MIN_MAG_NOT_EQUAL          (1 << 27) /* Gen6 only */
+#define BRW_SAMPLER_BASE_MIPLEVEL_MASK          INTEL_MASK(26, 22)
+#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT         22
+#define BRW_SAMPLER_MIP_FILTER_MASK             INTEL_MASK(21, 20)
+#define BRW_SAMPLER_MIP_FILTER_SHIFT            20
+#define BRW_SAMPLER_MAG_FILTER_MASK             INTEL_MASK(19, 17)
+#define BRW_SAMPLER_MAG_FILTER_SHIFT            17
+#define BRW_SAMPLER_MIN_FILTER_MASK             INTEL_MASK(16, 14)
+#define BRW_SAMPLER_MIN_FILTER_SHIFT            14
+#define GEN4_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 3)
+#define GEN4_SAMPLER_LOD_BIAS_SHIFT             3
+#define GEN4_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(2, 0)
+#define GEN4_SAMPLER_SHADOW_FUNCTION_SHIFT      0
+
+#define GEN7_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 1)
+#define GEN7_SAMPLER_LOD_BIAS_SHIFT             1
+#define GEN7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM  (1 << 0)
+
+/* SAMPLER_STATE DW1 */
+#define GEN4_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 22)
+#define GEN4_SAMPLER_MIN_LOD_SHIFT              22
+#define GEN4_SAMPLER_MAX_LOD_MASK               INTEL_MASK(21, 12)
+#define GEN4_SAMPLER_MAX_LOD_SHIFT              12
+#define GEN4_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 9)
+/* Wrap modes are in DW1 on Gen4-6 and DW3 on Gen7+ */
+#define BRW_SAMPLER_TCX_WRAP_MODE_MASK          INTEL_MASK(8, 6)
+#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT         6
+#define BRW_SAMPLER_TCY_WRAP_MODE_MASK          INTEL_MASK(5, 3)
+#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT         3
+#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK          INTEL_MASK(2, 0)
+#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT         0
+
+#define GEN7_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 20)
+#define GEN7_SAMPLER_MIN_LOD_SHIFT              20
+#define GEN7_SAMPLER_MAX_LOD_MASK               INTEL_MASK(19, 8)
+#define GEN7_SAMPLER_MAX_LOD_SHIFT              8
+#define GEN7_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(3, 1)
+#define GEN7_SAMPLER_SHADOW_FUNCTION_SHIFT      1
+#define GEN7_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 0)
+
+/* SAMPLER_STATE DW2 - border color pointer */
+
+/* SAMPLER_STATE DW3 */
+#define BRW_SAMPLER_MAX_ANISOTROPY_MASK         INTEL_MASK(21, 19)
+#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT        19
+#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK       INTEL_MASK(18, 13)
+#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT      13
+#define GEN7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
+/* Gen7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
+#define GEN6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
+
+enum brw_wrap_mode {
+   BRW_TEXCOORDMODE_WRAP         = 0,
+   BRW_TEXCOORDMODE_MIRROR       = 1,
+   BRW_TEXCOORDMODE_CLAMP        = 2,
+   BRW_TEXCOORDMODE_CUBE         = 3,
+   BRW_TEXCOORDMODE_CLAMP_BORDER = 4,
+   BRW_TEXCOORDMODE_MIRROR_ONCE  = 5,
+   GEN8_TEXCOORDMODE_HALF_BORDER = 6,
+};
 
 #define BRW_THREAD_PRIORITY_NORMAL   0
 #define BRW_THREAD_PRIORITY_HIGH     1
@@ -622,18 +721,20 @@ enum brw_compression {
 #define GEN6_COMPRESSION_1H            0
 #define GEN6_COMPRESSION_2H            2
 
-#define BRW_CONDITIONAL_NONE  0
-#define BRW_CONDITIONAL_Z     1
-#define BRW_CONDITIONAL_NZ    2
-#define BRW_CONDITIONAL_EQ    1        /* Z */
-#define BRW_CONDITIONAL_NEQ   2        /* NZ */
-#define BRW_CONDITIONAL_G     3
-#define BRW_CONDITIONAL_GE    4
-#define BRW_CONDITIONAL_L     5
-#define BRW_CONDITIONAL_LE    6
-#define BRW_CONDITIONAL_R     7
-#define BRW_CONDITIONAL_O     8
-#define BRW_CONDITIONAL_U     9
+enum PACKED brw_conditional_mod {
+   BRW_CONDITIONAL_NONE = 0,
+   BRW_CONDITIONAL_Z    = 1,
+   BRW_CONDITIONAL_NZ   = 2,
+   BRW_CONDITIONAL_EQ   = 1,   /* Z */
+   BRW_CONDITIONAL_NEQ  = 2,   /* NZ */
+   BRW_CONDITIONAL_G    = 3,
+   BRW_CONDITIONAL_GE   = 4,
+   BRW_CONDITIONAL_L    = 5,
+   BRW_CONDITIONAL_LE   = 6,
+   BRW_CONDITIONAL_R    = 7,    /* Gen <= 5 */
+   BRW_CONDITIONAL_O    = 8,
+   BRW_CONDITIONAL_U    = 9,
+};
 
 #define BRW_DEBUG_NONE        0
 #define BRW_DEBUG_BREAKPOINT  1
@@ -643,17 +744,21 @@ enum brw_compression {
 #define BRW_DEPENDENCY_NOTCHECKED     2
 #define BRW_DEPENDENCY_DISABLE        3
 
-#define BRW_EXECUTE_1     0
-#define BRW_EXECUTE_2     1
-#define BRW_EXECUTE_4     2
-#define BRW_EXECUTE_8     3
-#define BRW_EXECUTE_16    4
-#define BRW_EXECUTE_32    5
+enum PACKED brw_execution_size {
+   BRW_EXECUTE_1  = 0,
+   BRW_EXECUTE_2  = 1,
+   BRW_EXECUTE_4  = 2,
+   BRW_EXECUTE_8  = 3,
+   BRW_EXECUTE_16 = 4,
+   BRW_EXECUTE_32 = 5,
+};
 
-#define BRW_HORIZONTAL_STRIDE_0   0
-#define BRW_HORIZONTAL_STRIDE_1   1
-#define BRW_HORIZONTAL_STRIDE_2   2
-#define BRW_HORIZONTAL_STRIDE_4   3
+enum PACKED brw_horizontal_stride {
+   BRW_HORIZONTAL_STRIDE_0 = 0,
+   BRW_HORIZONTAL_STRIDE_1 = 1,
+   BRW_HORIZONTAL_STRIDE_2 = 2,
+   BRW_HORIZONTAL_STRIDE_4 = 3,
+};
 
 #define BRW_INSTRUCTION_NORMAL    0
 #define BRW_INSTRUCTION_SATURATE  1
@@ -695,15 +800,16 @@ enum opcode {
    BRW_OPCODE_ASR =    12,
    BRW_OPCODE_CMP =    16,
    BRW_OPCODE_CMPN =   17,
-   BRW_OPCODE_F32TO16 = 19,
-   BRW_OPCODE_F16TO32 = 20,
-   BRW_OPCODE_BFREV =  23,
-   BRW_OPCODE_BFE =    24,
-   BRW_OPCODE_BFI1 =   25,
-   BRW_OPCODE_BFI2 =   26,
+   BRW_OPCODE_CSEL =   18,  /**< Gen8+ */
+   BRW_OPCODE_F32TO16 = 19,  /**< Gen7 only */
+   BRW_OPCODE_F16TO32 = 20,  /**< Gen7 only */
+   BRW_OPCODE_BFREV =  23,  /**< Gen7+ */
+   BRW_OPCODE_BFE =    24,  /**< Gen7+ */
+   BRW_OPCODE_BFI1 =   25,  /**< Gen7+ */
+   BRW_OPCODE_BFI2 =   26,  /**< Gen7+ */
    BRW_OPCODE_JMPI =   32,
    BRW_OPCODE_IF =     34,
-   BRW_OPCODE_IFF =    35,
+   BRW_OPCODE_IFF =    35,  /**< Pre-Gen6 */
    BRW_OPCODE_ELSE =   36,
    BRW_OPCODE_ENDIF =  37,
    BRW_OPCODE_DO =     38,
@@ -711,14 +817,15 @@ enum opcode {
    BRW_OPCODE_BREAK =  40,
    BRW_OPCODE_CONTINUE = 41,
    BRW_OPCODE_HALT =   42,
-   BRW_OPCODE_MSAVE =  44,
-   BRW_OPCODE_MRESTORE = 45,
-   BRW_OPCODE_PUSH =   46,
-   BRW_OPCODE_POP =    47,
+   BRW_OPCODE_MSAVE =  44,  /**< Pre-Gen6 */
+   BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */
+   BRW_OPCODE_PUSH =   46,  /**< Pre-Gen6 */
+   BRW_OPCODE_GOTO =   46,  /**< Gen8+    */
+   BRW_OPCODE_POP =    47,  /**< Pre-Gen6 */
    BRW_OPCODE_WAIT =   48,
    BRW_OPCODE_SEND =   49,
    BRW_OPCODE_SENDC =  50,
-   BRW_OPCODE_MATH =   56,
+   BRW_OPCODE_MATH =   56,  /**< Gen6+ */
    BRW_OPCODE_ADD =    64,
    BRW_OPCODE_MUL =    65,
    BRW_OPCODE_AVG =    66,
@@ -730,11 +837,11 @@ enum opcode {
    BRW_OPCODE_MAC =    72,
    BRW_OPCODE_MACH =   73,
    BRW_OPCODE_LZD =    74,
-   BRW_OPCODE_FBH =    75,
-   BRW_OPCODE_FBL =    76,
-   BRW_OPCODE_CBIT =   77,
-   BRW_OPCODE_ADDC =   78,
-   BRW_OPCODE_SUBB =   79,
+   BRW_OPCODE_FBH =    75,  /**< Gen7+ */
+   BRW_OPCODE_FBL =    76,  /**< Gen7+ */
+   BRW_OPCODE_CBIT =   77,  /**< Gen7+ */
+   BRW_OPCODE_ADDC =   78,  /**< Gen7+ */
+   BRW_OPCODE_SUBB =   79,  /**< Gen7+ */
    BRW_OPCODE_SAD2 =   80,
    BRW_OPCODE_SADA2 =  81,
    BRW_OPCODE_DP4 =    84,
@@ -742,15 +849,18 @@ enum opcode {
    BRW_OPCODE_DP3 =    86,
    BRW_OPCODE_DP2 =    87,
    BRW_OPCODE_LINE =   89,
-   BRW_OPCODE_PLN =    90,
-   BRW_OPCODE_MAD =    91,
-   BRW_OPCODE_LRP =    92,
+   BRW_OPCODE_PLN =    90,  /**< G45+ */
+   BRW_OPCODE_MAD =    91,  /**< Gen6+ */
+   BRW_OPCODE_LRP =    92,  /**< Gen6+ */
+   BRW_OPCODE_NENOP =  125, /**< G45 only */
    BRW_OPCODE_NOP =    126,
 
    /* These are compiler backend opcodes that get translated into other
     * instructions.
     */
    FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_BLORP_FB_WRITE,
+   FS_OPCODE_REP_FB_WRITE,
    SHADER_OPCODE_RCP,
    SHADER_OPCODE_RSQ,
    SHADER_OPCODE_SQRT,
@@ -768,11 +878,27 @@ enum opcode {
    SHADER_OPCODE_TXL,
    SHADER_OPCODE_TXS,
    FS_OPCODE_TXB,
-   SHADER_OPCODE_TXF_MS,
+   SHADER_OPCODE_TXF_CMS,
+   SHADER_OPCODE_TXF_UMS,
+   SHADER_OPCODE_TXF_MCS,
    SHADER_OPCODE_LOD,
    SHADER_OPCODE_TG4,
    SHADER_OPCODE_TG4_OFFSET,
 
+   /**
+    * Combines multiple sources of size 1 into a larger virtual GRF.
+    * For example, parameters for a send-from-GRF message.  Or, updating
+    * channels of a size 4 VGRF used to store vec4s such as texturing results.
+    *
+    * This will be lowered into MOVs from each source to consecutive reg_offsets
+    * of the destination VGRF.
+    *
+    * src[0] may be BAD_FILE.  If so, the lowering pass skips emitting the MOV,
+    * but still reserves the first channel of the destination VGRF.  This can be
+    * used to reserve space for, say, a message header set up by the generators.
+    */
+   SHADER_OPCODE_LOAD_PAYLOAD,
+
    SHADER_OPCODE_SHADER_TIME_ADD,
 
    SHADER_OPCODE_UNTYPED_ATOMIC,
@@ -782,8 +908,19 @@ enum opcode {
    SHADER_OPCODE_GEN4_SCRATCH_WRITE,
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
+   SHADER_OPCODE_URB_WRITE_SIMD8,
+
+   VEC4_OPCODE_PACK_BYTES,
+   VEC4_OPCODE_UNPACK_UNIFORM,
+
+   FS_OPCODE_DDX_COARSE,
+   FS_OPCODE_DDX_FINE,
+   /**
+    * Compute dFdy(), dFdyCoarse(), or dFdyFine().
+    * src1 is an immediate storing the key->render_to_fbo boolean.
+    */
+   FS_OPCODE_DDY_COARSE,
+   FS_OPCODE_DDY_FINE,
    FS_OPCODE_PIXEL_X,
    FS_OPCODE_PIXEL_Y,
    FS_OPCODE_CINTERP,
@@ -794,11 +931,17 @@ enum opcode {
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
+   FS_OPCODE_SET_OMASK,
+   FS_OPCODE_SET_SAMPLE_ID,
    FS_OPCODE_SET_SIMD4X2_OFFSET,
    FS_OPCODE_PACK_HALF_2x16_SPLIT,
    FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
    FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
    FS_OPCODE_PLACEHOLDER_HALT,
+   FS_OPCODE_INTERPOLATE_AT_CENTROID,
+   FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+   FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+   FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
 
    VS_OPCODE_URB_WRITE,
    VS_OPCODE_PULL_CONSTANT_LOAD,
@@ -814,6 +957,14 @@ enum opcode {
     */
    GS_OPCODE_URB_WRITE,
 
+   /**
+    * Write geometry shader output data to the URB and request a new URB
+    * handle (gen6).
+    *
+    * This opcode doesn't do an implied move from R0 to the first MRF.
+    */
+   GS_OPCODE_URB_WRITE_ALLOCATE,
+
    /**
     * Terminate the geometry shader thread by doing an empty URB write.
     *
@@ -850,11 +1001,9 @@ enum opcode {
    GS_OPCODE_SET_VERTEX_COUNT,
 
    /**
-    * Set DWORD 2 of dst to the immediate value in src.  Used by geometry
-    * shaders to initialize DWORD 2 of R0, which needs to be 0 in order for
-    * scratch reads and writes to operate correctly.
+    * Set DWORD 2 of dst to the value in src.
     */
-   GS_OPCODE_SET_DWORD_2_IMMED,
+   GS_OPCODE_SET_DWORD_2,
 
    /**
     * Prepare the dst register for storage in the "Channel Mask" fields of a
@@ -881,42 +1030,195 @@ enum opcode {
     *   form the final channel mask.
     */
    GS_OPCODE_SET_CHANNEL_MASKS,
+
+   /**
+    * Get the "Instance ID" fields from the payload.
+    *
+    * - dst is the GRF for gl_InvocationID.
+    */
+   GS_OPCODE_GET_INSTANCE_ID,
+
+   /**
+    * Send a FF_SYNC message to allocate initial URB handles (gen6).
+    *
+    * - dst will be used as the writeback register for the FF_SYNC operation.
+    *
+    * - src0 is the number of primitives written.
+    *
+    * - src1 is the value to hold in M0.0: number of SO vertices to write
+    *   and number of SO primitives needed. Its value will be overwritten
+    *   with the SVBI values if transform feedback is enabled.
+    *
+    * Note: This opcode uses an implicit MRF register for the ff_sync message
+    * header, so the caller is expected to set inst->base_mrf and initialize
+    * that MRF register to r0. This opcode will also write to this MRF register
+    * to include the allocated URB handle so it can then be reused directly as
+    * the header in the URB write operation we are allocating the handle for.
+    */
+   GS_OPCODE_FF_SYNC,
+
+   /**
+    * Move r0.1 (which holds PrimitiveID information in gen6) to a separate
+    * register.
+    *
+    * - dst is the GRF where PrimitiveID information will be moved.
+    */
+   GS_OPCODE_SET_PRIMITIVE_ID,
+
+   /**
+    * Write transform feedback data to the SVB by sending a SVB WRITE message.
+    * Used in gen6.
+    *
+    * - dst is the MRF register containing the message header.
+    *
+    * - src0 is the register where the vertex data is going to be copied from.
+    *
+    * - src1 is the destination register when write commit occurs.
+    */
+   GS_OPCODE_SVB_WRITE,
+
+   /**
+    * Set destination index in the SVB write message payload (M0.5). Used
+    * in gen6 for transform feedback.
+    *
+    * - dst is the header to save the destination indices for SVB WRITE.
+    * - src is the register that holds the destination indices value.
+    */
+   GS_OPCODE_SVB_SET_DST_INDEX,
+
+   /**
+    * Prepare Mx.0 subregister for being used in the FF_SYNC message header.
+    * Used in gen6 for transform feedback.
+    *
+    * - dst will hold the register with the final Mx.0 value.
+    *
+    * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
+    *
+    * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
+    *
+    * - src2 is the value to hold in M0: number of SO vertices to write
+    *   and number of SO primitives needed.
+    */
+   GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+};
+
+enum brw_urb_write_flags {
+   BRW_URB_WRITE_NO_FLAGS = 0,
+
+   /**
+    * Causes a new URB entry to be allocated, and its address stored in the
+    * destination register (gen < 7).
+    */
+   BRW_URB_WRITE_ALLOCATE = 0x1,
+
+   /**
+    * Causes the current URB entry to be deallocated (gen < 7).
+    */
+   BRW_URB_WRITE_UNUSED = 0x2,
+
+   /**
+    * Causes the thread to terminate.
+    */
+   BRW_URB_WRITE_EOT = 0x4,
+
+   /**
+    * Indicates that the given URB entry is complete, and may be sent further
+    * down the 3D pipeline (gen < 7).
+    */
+   BRW_URB_WRITE_COMPLETE = 0x8,
+
+   /**
+    * Indicates that an additional offset (which may be different for the two
+    * vec4 slots) is stored in the message header (gen == 7).
+    */
+   BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
+
+   /**
+    * Indicates that the channel masks in the URB_WRITE message header should
+    * not be overridden to 0xff (gen == 7).
+    */
+   BRW_URB_WRITE_USE_CHANNEL_MASKS = 0x20,
+
+   /**
+    * Indicates that the data should be sent to the URB using the
+    * URB_WRITE_OWORD message rather than URB_WRITE_HWORD (gen == 7).  This
+    * causes offsets to be interpreted as multiples of an OWORD instead of an
+    * HWORD, and only allows one OWORD to be written.
+    */
+   BRW_URB_WRITE_OWORD = 0x40,
+
+   /**
+    * Convenient combination of flags: end the thread while simultaneously
+    * marking the given URB entry as complete.
+    */
+   BRW_URB_WRITE_EOT_COMPLETE = BRW_URB_WRITE_EOT | BRW_URB_WRITE_COMPLETE,
+
+   /**
+    * Convenient combination of flags: mark the given URB entry as complete
+    * and simultaneously allocate a new one.
+    */
+   BRW_URB_WRITE_ALLOCATE_COMPLETE =
+      BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
 };
 
-#define BRW_PREDICATE_NONE             0
-#define BRW_PREDICATE_NORMAL           1
-#define BRW_PREDICATE_ALIGN1_ANYV             2
-#define BRW_PREDICATE_ALIGN1_ALLV             3
-#define BRW_PREDICATE_ALIGN1_ANY2H            4
-#define BRW_PREDICATE_ALIGN1_ALL2H            5
-#define BRW_PREDICATE_ALIGN1_ANY4H            6
-#define BRW_PREDICATE_ALIGN1_ALL4H            7
-#define BRW_PREDICATE_ALIGN1_ANY8H            8
-#define BRW_PREDICATE_ALIGN1_ALL8H            9
-#define BRW_PREDICATE_ALIGN1_ANY16H           10
-#define BRW_PREDICATE_ALIGN1_ALL16H           11
-#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
-#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
-#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
-#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
-#define BRW_PREDICATE_ALIGN16_ANY4H           6
-#define BRW_PREDICATE_ALIGN16_ALL4H           7
+#ifdef __cplusplus
+/**
+ * Allow brw_urb_write_flags enums to be ORed together.
+ */
+inline brw_urb_write_flags
+operator|(brw_urb_write_flags x, brw_urb_write_flags y)
+{
+   return static_cast<brw_urb_write_flags>(static_cast<int>(x) |
+                                           static_cast<int>(y));
+}
+#endif
+
+enum PACKED brw_predicate {
+   BRW_PREDICATE_NONE                =  0,
+   BRW_PREDICATE_NORMAL              =  1,
+   BRW_PREDICATE_ALIGN1_ANYV         =  2,
+   BRW_PREDICATE_ALIGN1_ALLV         =  3,
+   BRW_PREDICATE_ALIGN1_ANY2H        =  4,
+   BRW_PREDICATE_ALIGN1_ALL2H        =  5,
+   BRW_PREDICATE_ALIGN1_ANY4H        =  6,
+   BRW_PREDICATE_ALIGN1_ALL4H        =  7,
+   BRW_PREDICATE_ALIGN1_ANY8H        =  8,
+   BRW_PREDICATE_ALIGN1_ALL8H        =  9,
+   BRW_PREDICATE_ALIGN1_ANY16H       = 10,
+   BRW_PREDICATE_ALIGN1_ALL16H       = 11,
+   BRW_PREDICATE_ALIGN1_ANY32H       = 12,
+   BRW_PREDICATE_ALIGN1_ALL32H       = 13,
+   BRW_PREDICATE_ALIGN16_REPLICATE_X =  2,
+   BRW_PREDICATE_ALIGN16_REPLICATE_Y =  3,
+   BRW_PREDICATE_ALIGN16_REPLICATE_Z =  4,
+   BRW_PREDICATE_ALIGN16_REPLICATE_W =  5,
+   BRW_PREDICATE_ALIGN16_ANY4H       =  6,
+   BRW_PREDICATE_ALIGN16_ALL4H       =  7,
+};
 
 #define BRW_ARCHITECTURE_REGISTER_FILE    0
 #define BRW_GENERAL_REGISTER_FILE         1
 #define BRW_MESSAGE_REGISTER_FILE         2
 #define BRW_IMMEDIATE_VALUE               3
 
-#define BRW_REGISTER_TYPE_UD  0
-#define BRW_REGISTER_TYPE_D   1
-#define BRW_REGISTER_TYPE_UW  2
-#define BRW_REGISTER_TYPE_W   3
-#define BRW_REGISTER_TYPE_UB  4
-#define BRW_REGISTER_TYPE_B   5
-#define BRW_REGISTER_TYPE_VF  5        /* packed float vector, immediates only? */
-#define BRW_REGISTER_TYPE_HF  6
-#define BRW_REGISTER_TYPE_V   6        /* packed int vector, immediates only, uword dest only */
-#define BRW_REGISTER_TYPE_F   7
+#define BRW_HW_REG_TYPE_UD  0
+#define BRW_HW_REG_TYPE_D   1
+#define BRW_HW_REG_TYPE_UW  2
+#define BRW_HW_REG_TYPE_W   3
+#define BRW_HW_REG_TYPE_F   7
+#define GEN8_HW_REG_TYPE_UQ 8
+#define GEN8_HW_REG_TYPE_Q  9
+
+#define BRW_HW_REG_NON_IMM_TYPE_UB  4
+#define BRW_HW_REG_NON_IMM_TYPE_B   5
+#define GEN7_HW_REG_NON_IMM_TYPE_DF 6
+#define GEN8_HW_REG_NON_IMM_TYPE_HF 10
+
+#define BRW_HW_REG_IMM_TYPE_UV  4 /* Gen6+ packed unsigned immediate vector */
+#define BRW_HW_REG_IMM_TYPE_VF  5 /* packed float immediate vector */
+#define BRW_HW_REG_IMM_TYPE_V   6 /* packed int imm. vector; uword dest only */
+#define GEN8_HW_REG_IMM_TYPE_DF 10
+#define GEN8_HW_REG_IMM_TYPE_HF 11
 
 /* SNB adds 3-src instructions (MAD and LRP) that only operate on floats, so
  * the types were implied. IVB adds BFE and BFI2 that operate on doublewords
@@ -956,23 +1258,24 @@ enum opcode {
 #define BRW_THREAD_ATOMIC     1
 #define BRW_THREAD_SWITCH     2
 
-#define BRW_VERTICAL_STRIDE_0                 0
-#define BRW_VERTICAL_STRIDE_1                 1
-#define BRW_VERTICAL_STRIDE_2                 2
-#define BRW_VERTICAL_STRIDE_4                 3
-#define BRW_VERTICAL_STRIDE_8                 4
-#define BRW_VERTICAL_STRIDE_16                5
-#define BRW_VERTICAL_STRIDE_32                6
-#define BRW_VERTICAL_STRIDE_64                7
-#define BRW_VERTICAL_STRIDE_128               8
-#define BRW_VERTICAL_STRIDE_256               9
-#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
-
-#define BRW_WIDTH_1       0
-#define BRW_WIDTH_2       1
-#define BRW_WIDTH_4       2
-#define BRW_WIDTH_8       3
-#define BRW_WIDTH_16      4
+enum PACKED brw_vertical_stride {
+   BRW_VERTICAL_STRIDE_0               = 0,
+   BRW_VERTICAL_STRIDE_1               = 1,
+   BRW_VERTICAL_STRIDE_2               = 2,
+   BRW_VERTICAL_STRIDE_4               = 3,
+   BRW_VERTICAL_STRIDE_8               = 4,
+   BRW_VERTICAL_STRIDE_16              = 5,
+   BRW_VERTICAL_STRIDE_32              = 6,
+   BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL = 0xF,
+};
+
+enum PACKED brw_width {
+   BRW_WIDTH_1  = 0,
+   BRW_WIDTH_2  = 1,
+   BRW_WIDTH_4  = 2,
+   BRW_WIDTH_8  = 3,
+   BRW_WIDTH_16 = 4,
+};
 
 #define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
 #define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
@@ -1008,13 +1311,16 @@ enum brw_message_target {
    BRW_SFID_DATAPORT_WRITE           = 5,
    BRW_SFID_URB                      = 6,
    BRW_SFID_THREAD_SPAWNER           = 7,
+   BRW_SFID_VME                      = 8,
 
    GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
    GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
    GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
    GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
+   GEN7_SFID_PIXEL_INTERPOLATOR      = 11,
    HSW_SFID_DATAPORT_DATA_CACHE_1    = 12,
+   HSW_SFID_CRE                      = 13,
 };
 
 #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
@@ -1148,6 +1454,11 @@ enum brw_message_target {
                                                                (1 << 17))
 #define GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT                        12
 
+#define GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET     0
+#define GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE            1
+#define GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID          2
+#define GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET   3
+
 /* HSW */
 #define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ                      0
 #define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ            1
@@ -1203,6 +1514,8 @@ enum brw_message_target {
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
 #define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+#define GEN8_MATH_FUNCTION_INVM                            14
+#define GEN8_MATH_FUNCTION_RSQRTM                          15
 
 #define BRW_MATH_INTEGER_UNSIGNED     0
 #define BRW_MATH_INTEGER_SIGNED       1
@@ -1218,6 +1531,7 @@ enum brw_message_target {
 
 #define BRW_URB_OPCODE_WRITE_HWORD  0
 #define BRW_URB_OPCODE_WRITE_OWORD  1
+#define GEN8_URB_OPCODE_SIMD8_WRITE  7
 
 #define BRW_URB_SWIZZLE_NONE          0
 #define BRW_URB_SWIZZLE_INTERLEAVE    1
@@ -1318,6 +1632,21 @@ enum brw_message_target {
 #define _3DSTATE_VF                             0x780c /* GEN7.5+ */
 #define HSW_CUT_INDEX_ENABLE                            (1 << 8)
 
+#define _3DSTATE_VF_INSTANCING                  0x7849 /* GEN8+ */
+# define GEN8_VF_INSTANCING_ENABLE                      (1 << 8)
+
+#define _3DSTATE_VF_SGVS                        0x784a /* GEN8+ */
+# define GEN8_SGVS_ENABLE_INSTANCE_ID                   (1 << 31)
+# define GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT          29
+# define GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT     16
+# define GEN8_SGVS_ENABLE_VERTEX_ID                     (1 << 15)
+# define GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT            13
+# define GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT       0
+
+#define _3DSTATE_VF_TOPOLOGY                    0x784b /* GEN8+ */
+
+#define _3DSTATE_WM_CHROMAKEY                  0x784c /* GEN8+ */
+
 #define _3DSTATE_URB_VS                         0x7830 /* GEN7+ */
 #define _3DSTATE_URB_HS                         0x7831 /* GEN7+ */
 #define _3DSTATE_URB_DS                         0x7832 /* GEN7+ */
@@ -1325,10 +1654,14 @@ enum brw_message_target {
 # define GEN7_URB_ENTRY_SIZE_SHIFT                      16
 # define GEN7_URB_STARTING_ADDRESS_SHIFT                25
 
-/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
+/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
  * is 2^9, or 512.  It's counted in multiples of 64 bytes.
  */
-#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES               (512*64)
+#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES                (512*64)
+/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
+ * (128 bytes) URB rows and the maximum allowed value is 5 rows.
+ */
+#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
 
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
@@ -1339,6 +1672,7 @@ enum brw_message_target {
 # define GEN6_CC_VIEWPORT_MODIFY                       (1 << 12)
 # define GEN6_SF_VIEWPORT_MODIFY                       (1 << 11)
 # define GEN6_CLIP_VIEWPORT_MODIFY                     (1 << 10)
+# define GEN7_NUM_VIEWPORTS                            16
 
 #define _3DSTATE_VIEWPORT_STATE_POINTERS_CC    0x7823 /* GEN7+ */
 #define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
@@ -1363,6 +1697,12 @@ enum brw_message_target {
 # define GEN6_VS_STATISTICS_ENABLE                     (1 << 10)
 # define GEN6_VS_CACHE_DISABLE                         (1 << 1)
 # define GEN6_VS_ENABLE                                        (1 << 0)
+/* Gen8+ DW7 */
+# define GEN8_VS_SIMD8_ENABLE                           (1 << 2)
+/* Gen8+ DW8 */
+# define GEN8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+# define GEN8_VS_URB_OUTPUT_LENGTH_SHIFT                16
+# define GEN8_VS_USER_CLIP_DISTANCE_SHIFT               8
 
 #define _3DSTATE_GS                            0x7811 /* GEN6+ */
 /* DW2 */
@@ -1386,6 +1726,7 @@ enum brw_message_target {
 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT         0
 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID         1
 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT                20
+# define GEN7_GS_INSTANCE_CONTROL_SHIFT                        15
 # define GEN7_GS_DISPATCH_MODE_SINGLE                  (0 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE           (1 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT             (2 << 11)
@@ -1405,9 +1746,19 @@ enum brw_message_target {
 # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK         INTEL_MASK(25, 16)
 # define GEN6_GS_ENABLE                                        (1 << 15)
 
+/* Gen8+ DW9 */
+# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
+# define GEN8_GS_USER_CLIP_DISTANCE_SHIFT               8
+
 # define BRW_GS_EDGE_INDICATOR_0                       (1 << 8)
 # define BRW_GS_EDGE_INDICATOR_1                       (1 << 9)
 
+/* GS Thread Payload
+ */
+/* R0 */
+# define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT             27
+
 /* 3DSTATE_GS "Output Vertex Size" has an effective maximum of 62.  It's
  * counted in multiples of 16 bytes.
  */
@@ -1455,6 +1806,7 @@ enum brw_message_target {
 # define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT               17
 # define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT               6
 # define GEN6_CLIP_FORCE_ZERO_RTAINDEX                 (1 << 5)
+# define GEN6_CLIP_MAX_VP_INDEX_MASK                   INTEL_MASK(3, 0)
 
 #define _3DSTATE_SF                            0x7813 /* GEN6+ */
 /* DW1 (for gen6) */
@@ -1462,6 +1814,7 @@ enum brw_message_target {
 # define GEN6_SF_SWIZZLE_ENABLE                                (1 << 21)
 # define GEN6_SF_POINT_SPRITE_UPPERLEFT                        (0 << 20)
 # define GEN6_SF_POINT_SPRITE_LOWERLEFT                        (1 << 20)
+# define GEN9_SF_LINE_WIDTH_SHIFT                      12 /* U11.7 */
 # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT           11
 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT           4
 /* DW2 */
@@ -1545,22 +1898,189 @@ enum brw_message_target {
 /* GEN7/DW1: */
 # define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT     12
 /* GEN7/DW2: */
-# define HSW_SF_LINE_STIPPLE_ENABLE                    14
+# define HSW_SF_LINE_STIPPLE_ENABLE                    (1 << 14)
+
+# define GEN8_SF_SMOOTH_POINT_ENABLE                    (1 << 13)
 
 #define _3DSTATE_SBE                           0x781F /* GEN7+ */
 /* DW1 */
+# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH           (1 << 29)
+# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET           (1 << 28)
 # define GEN7_SBE_SWIZZLE_CONTROL_MODE                 (1 << 28)
 # define GEN7_SBE_NUM_OUTPUTS_SHIFT                    22
 # define GEN7_SBE_SWIZZLE_ENABLE                       (1 << 21)
 # define GEN7_SBE_POINT_SPRITE_LOWERLEFT               (1 << 20)
 # define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT          11
 # define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT          4
+# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT          5
 /* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
 /* DW10: Point sprite texture coordinate enables */
 /* DW11: Constant interpolation enables */
 /* DW12: attr 0-7 wrap shortest enables */
 /* DW13: attr 8-16 wrap shortest enables */
 
+/* DW4-5: Attribute active components (gen9) */
+#define GEN9_SBE_ACTIVE_COMPONENT_NONE                 0
+#define GEN9_SBE_ACTIVE_COMPONENT_XY                   1
+#define GEN9_SBE_ACTIVE_COMPONENT_XYZ                  2
+#define GEN9_SBE_ACTIVE_COMPONENT_XYZW                 3
+
+#define _3DSTATE_SBE_SWIZ                       0x7851 /* GEN8+ */
+
+#define _3DSTATE_RASTER                         0x7850 /* GEN8+ */
+/* DW1 */
+# define GEN9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE    (1 << 26)
+# define GEN8_RASTER_FRONT_WINDING_CCW                  (1 << 21)
+# define GEN8_RASTER_CULL_BOTH                          (0 << 16)
+# define GEN8_RASTER_CULL_NONE                          (1 << 16)
+# define GEN8_RASTER_CULL_FRONT                         (2 << 16)
+# define GEN8_RASTER_CULL_BACK                          (3 << 16)
+# define GEN8_RASTER_SMOOTH_POINT_ENABLE                (1 << 13)
+# define GEN8_RASTER_API_MULTISAMPLE_ENABLE             (1 << 12)
+# define GEN8_RASTER_LINE_AA_ENABLE                     (1 << 2)
+# define GEN8_RASTER_SCISSOR_ENABLE                     (1 << 1)
+# define GEN8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE        (1 << 0)
+# define GEN9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE   (1 << 0)
+
+/* Gen8 BLEND_STATE */
+/* DW0 */
+#define GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE             (1 << 31)
+#define GEN8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE       (1 << 30)
+#define GEN8_BLEND_ALPHA_TO_ONE_ENABLE                  (1 << 29)
+#define GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE      (1 << 28)
+#define GEN8_BLEND_ALPHA_TEST_ENABLE                    (1 << 27)
+#define GEN8_BLEND_ALPHA_TEST_FUNCTION_MASK             INTEL_MASK(26, 24)
+#define GEN8_BLEND_ALPHA_TEST_FUNCTION_SHIFT            24
+#define GEN8_BLEND_COLOR_DITHER_ENABLE                  (1 << 23)
+#define GEN8_BLEND_X_DITHER_OFFSET_MASK                 INTEL_MASK(22, 21)
+#define GEN8_BLEND_X_DITHER_OFFSET_SHIFT                21
+#define GEN8_BLEND_Y_DITHER_OFFSET_MASK                 INTEL_MASK(20, 19)
+#define GEN8_BLEND_Y_DITHER_OFFSET_SHIFT                19
+/* DW1 + 2n */
+#define GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE            (1 << 31)
+#define GEN8_BLEND_SRC_BLEND_FACTOR_MASK                INTEL_MASK(30, 26)
+#define GEN8_BLEND_SRC_BLEND_FACTOR_SHIFT               26
+#define GEN8_BLEND_DST_BLEND_FACTOR_MASK                INTEL_MASK(25, 21)
+#define GEN8_BLEND_DST_BLEND_FACTOR_SHIFT               21
+#define GEN8_BLEND_COLOR_BLEND_FUNCTION_MASK            INTEL_MASK(20, 18)
+#define GEN8_BLEND_COLOR_BLEND_FUNCTION_SHIFT           18
+#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(17, 13)
+#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT         13
+#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(12, 8)
+#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT         8
+#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_MASK            INTEL_MASK(7, 5)
+#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT           5
+#define GEN8_BLEND_WRITE_DISABLE_ALPHA                  (1 << 3)
+#define GEN8_BLEND_WRITE_DISABLE_RED                    (1 << 2)
+#define GEN8_BLEND_WRITE_DISABLE_GREEN                  (1 << 1)
+#define GEN8_BLEND_WRITE_DISABLE_BLUE                   (1 << 0)
+/* DW1 + 2n + 1 */
+#define GEN8_BLEND_LOGIC_OP_ENABLE                      (1 << 31)
+#define GEN8_BLEND_LOGIC_OP_FUNCTION_MASK               INTEL_MASK(30, 27)
+#define GEN8_BLEND_LOGIC_OP_FUNCTION_SHIFT              27
+#define GEN8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE      (1 << 4)
+#define GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT           (2 << 2)
+#define GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE         (1 << 1)
+#define GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE        (1 << 0)
+
+#define _3DSTATE_WM_HZ_OP                       0x7852 /* GEN8+ */
+/* DW1 */
+# define GEN8_WM_HZ_STENCIL_CLEAR                       (1 << 31)
+# define GEN8_WM_HZ_DEPTH_CLEAR                         (1 << 30)
+# define GEN8_WM_HZ_DEPTH_RESOLVE                       (1 << 28)
+# define GEN8_WM_HZ_HIZ_RESOLVE                         (1 << 27)
+# define GEN8_WM_HZ_PIXEL_OFFSET_ENABLE                 (1 << 26)
+# define GEN8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR            (1 << 25)
+# define GEN8_WM_HZ_STENCIL_CLEAR_VALUE_MASK            INTEL_MASK(23, 16)
+# define GEN8_WM_HZ_STENCIL_CLEAR_VALUE_SHIFT           16
+# define GEN8_WM_HZ_NUM_SAMPLES_MASK                    INTEL_MASK(15, 13)
+# define GEN8_WM_HZ_NUM_SAMPLES_SHIFT                   13
+/* DW2 */
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_MASK          INTEL_MASK(31, 16)
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_SHIFT         16
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MIN_MASK          INTEL_MASK(15, 0)
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MIN_SHIFT         0
+/* DW3 */
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_MASK          INTEL_MASK(31, 16)
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_SHIFT         16
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX_MASK          INTEL_MASK(15, 0)
+# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX_SHIFT         0
+/* DW4 */
+# define GEN8_WM_HZ_SAMPLE_MASK_MASK                    INTEL_MASK(15, 0)
+# define GEN8_WM_HZ_SAMPLE_MASK_SHIFT                   0
+
+
+#define _3DSTATE_PS_BLEND                       0x784D /* GEN8+ */
+/* DW1 */
+# define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE         (1 << 31)
+# define GEN8_PS_BLEND_HAS_WRITEABLE_RT                 (1 << 30)
+# define GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE        (1 << 29)
+# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK      INTEL_MASK(28, 24)
+# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT     24
+# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK      INTEL_MASK(23, 19)
+# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT     19
+# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_MASK            INTEL_MASK(18, 14)
+# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT           14
+# define GEN8_PS_BLEND_DST_BLEND_FACTOR_MASK            INTEL_MASK(13, 9)
+# define GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT           9
+# define GEN8_PS_BLEND_ALPHA_TEST_ENABLE                (1 << 8)
+# define GEN8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE   (1 << 7)
+
+#define _3DSTATE_WM_DEPTH_STENCIL               0x784E /* GEN8+ */
+/* DW1 */
+# define GEN8_WM_DS_STENCIL_FAIL_OP_SHIFT               29
+# define GEN8_WM_DS_Z_FAIL_OP_SHIFT                     26
+# define GEN8_WM_DS_Z_PASS_OP_SHIFT                     23
+# define GEN8_WM_DS_BF_STENCIL_FUNC_SHIFT               20
+# define GEN8_WM_DS_BF_STENCIL_FAIL_OP_SHIFT            17
+# define GEN8_WM_DS_BF_Z_FAIL_OP_SHIFT                  14
+# define GEN8_WM_DS_BF_Z_PASS_OP_SHIFT                  11
+# define GEN8_WM_DS_STENCIL_FUNC_SHIFT                  8
+# define GEN8_WM_DS_DEPTH_FUNC_SHIFT                    5
+# define GEN8_WM_DS_DOUBLE_SIDED_STENCIL_ENABLE         (1 << 4)
+# define GEN8_WM_DS_STENCIL_TEST_ENABLE                 (1 << 3)
+# define GEN8_WM_DS_STENCIL_BUFFER_WRITE_ENABLE         (1 << 2)
+# define GEN8_WM_DS_DEPTH_TEST_ENABLE                   (1 << 1)
+# define GEN8_WM_DS_DEPTH_BUFFER_WRITE_ENABLE           (1 << 0)
+/* DW2 */
+# define GEN8_WM_DS_STENCIL_TEST_MASK_MASK              INTEL_MASK(31, 24)
+# define GEN8_WM_DS_STENCIL_TEST_MASK_SHIFT             24
+# define GEN8_WM_DS_STENCIL_WRITE_MASK_MASK             INTEL_MASK(23, 16)
+# define GEN8_WM_DS_STENCIL_WRITE_MASK_SHIFT            16
+# define GEN8_WM_DS_BF_STENCIL_TEST_MASK_MASK           INTEL_MASK(15, 8)
+# define GEN8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT          8
+# define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_MASK          INTEL_MASK(7, 0)
+# define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT         0
+/* DW3 */
+# define GEN9_WM_DS_STENCIL_REF_MASK                    INTEL_MASK(15, 8)
+# define GEN9_WM_DS_STENCIL_REF_SHIFT                   8
+# define GEN9_WM_DS_BF_STENCIL_REF_MASK                 INTEL_MASK(7, 0)
+# define GEN9_WM_DS_BF_STENCIL_REF_SHIFT                0
+
+enum brw_pixel_shader_computed_depth_mode {
+   BRW_PSCDEPTH_OFF   = 0, /* PS does not compute depth */
+   BRW_PSCDEPTH_ON    = 1, /* PS computes depth; no guarantee about value */
+   BRW_PSCDEPTH_ON_GE = 2, /* PS guarantees output depth >= source depth */
+   BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
+};
+
+#define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
+/* DW1 */
+# define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
+# define GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE              (1 << 30)
+# define GEN8_PSX_OMASK_TO_RENDER_TARGET                (1 << 29)
+# define GEN8_PSX_KILL_ENABLE                           (1 << 28)
+# define GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT             26
+# define GEN8_PSX_FORCE_COMPUTED_DEPTH                  (1 << 25)
+# define GEN8_PSX_USES_SOURCE_DEPTH                     (1 << 24)
+# define GEN8_PSX_USES_SOURCE_W                         (1 << 23)
+# define GEN8_PSX_ATTRIBUTE_ENABLE                      (1 << 8)
+# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE     (1 << 7)
+# define GEN8_PSX_SHADER_IS_PER_SAMPLE                  (1 << 6)
+# define GEN8_PSX_SHADER_COMPUTES_STENCIL               (1 << 5)
+# define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
+# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
+
 enum brw_wm_barycentric_interp_mode {
    BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC                = 0,
    BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC     = 1,
@@ -1692,10 +2212,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_WM_DEPTH_RESOLVE                         (1 << 28)
 # define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE            (1 << 27)
 # define GEN7_WM_KILL_ENABLE                           (1 << 25)
-# define GEN7_WM_PSCDEPTH_OFF                          (0 << 23)
-# define GEN7_WM_PSCDEPTH_ON                           (1 << 23)
-# define GEN7_WM_PSCDEPTH_ON_GE                                (2 << 23)
-# define GEN7_WM_PSCDEPTH_ON_LE                                (3 << 23)
+# define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT              23
 # define GEN7_WM_USES_SOURCE_DEPTH                     (1 << 20)
 # define GEN7_WM_USES_SOURCE_W                         (1 << 19)
 # define GEN7_WM_POSITION_ZW_PIXEL                     (0 << 17)
@@ -1776,12 +2293,17 @@ enum brw_wm_barycentric_interp_mode {
 /* DW3: SVB maximum index */
 
 #define _3DSTATE_MULTISAMPLE                   0x790d /* GEN6+ */
+#define GEN8_3DSTATE_MULTISAMPLE               0x780d /* GEN8+ */
 /* DW1 */
 # define MS_PIXEL_LOCATION_CENTER                      (0 << 4)
 # define MS_PIXEL_LOCATION_UPPER_LEFT                  (1 << 4)
 # define MS_NUMSAMPLES_1                               (0 << 1)
+# define MS_NUMSAMPLES_2                               (1 << 1)
 # define MS_NUMSAMPLES_4                               (2 << 1)
 # define MS_NUMSAMPLES_8                               (3 << 1)
+# define MS_NUMSAMPLES_16                              (4 << 1)
+
+#define _3DSTATE_SAMPLE_PATTERN                 0x791c
 
 #define _3DSTATE_STENCIL_BUFFER                        0x790e /* ILK, SNB */
 #define _3DSTATE_HIER_DEPTH_BUFFER             0x790f /* ILK, SNB */
@@ -1829,17 +2351,23 @@ enum brw_wm_barycentric_interp_mode {
 
 #define _3DSTATE_SO_BUFFER                    0x7918 /* GEN7+ */
 /* DW1 */
+# define GEN8_SO_BUFFER_ENABLE                          (1 << 31)
 # define SO_BUFFER_INDEX_SHIFT                         29
 # define SO_BUFFER_INDEX_MASK                          INTEL_MASK(30, 29)
+# define GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE             (1 << 21)
+# define GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE           (1 << 20)
 # define SO_BUFFER_PITCH_SHIFT                         0
 # define SO_BUFFER_PITCH_MASK                          INTEL_MASK(11, 0)
 /* DW2: start address */
 /* DW3: end address. */
 
-#define CMD_PIPE_CONTROL              0x7a00
-
 #define CMD_MI_FLUSH                  0x0200
 
+# define BLT_X_SHIFT                                   0
+# define BLT_X_MASK                                    INTEL_MASK(15, 0)
+# define BLT_Y_SHIFT                                   16
+# define BLT_Y_MASK                                    INTEL_MASK(31, 16)
+
 #define GEN5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2))
 /* DW0 */
 # define GEN5_MI_COUNTER_SET_0      (0 << 6)
@@ -1887,6 +2415,19 @@ enum brw_wm_barycentric_interp_mode {
 #define HSW_MOCS_WB_LLC_WB_ELLC         (2 << 1)
 #define HSW_MOCS_UC_LLC_WB_ELLC         (3 << 1)
 
-#include "intel_chipset.h"
+/* Broadwell: these defines always use all available caches (L3, LLC, eLLC),
+ * and let you force write-back (WB) or write-through (WT) caching, or leave
+ * it up to the page table entry (PTE) specified by the kernel.
+ */
+#define BDW_MOCS_WB  0x78
+#define BDW_MOCS_WT  0x58
+#define BDW_MOCS_PTE 0x18
+
+/* Skylake: MOCS is now an index into an array of 64 different configurable
+ * cache settings.  We still use only either write-back or write-through; and
+ * rely on the documented default values.
+ */
+#define SKL_MOCS_WB 9
+#define SKL_MOCS_WT 5
 
 #endif