* Keith Whitwell <keithw@vmware.com>
*/
-#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low))
/* Using the GNU statement expression extension */
#define SET_FIELD(value, field) \
({ \
#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
+/**
+ * For use with masked MMIO registers where the upper 16 bits control which
+ * of the lower bits are committed to the register.
+ */
+#define REG_MASK(value) ((value) << 16)
+
#ifndef BRW_DEFINES_H
#define BRW_DEFINES_H
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x16
+#define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); })
+
/* We use this offset to be able to pass native primitive types in struct
* _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET +
* Chipset Graphics Controller Programmer's Reference Manual,
* Volume 2: 3D/Media", Revision 1.0b as of January 2008,
* available at
- * http://intellinuxgraphics.org/documentation.html
+ * https://01.org/linuxgraphics/documentation/hardware-specification-prms
* at the time of this writing).
*
* These appear to be supported on at least some
#define GEN8_SURFACE_AUX_MODE_MCS 1
#define GEN8_SURFACE_AUX_MODE_APPEND 2
#define GEN8_SURFACE_AUX_MODE_HIZ 3
+#define GEN9_SURFACE_AUX_MODE_CCS_E 5
/* Surface state DW7 */
#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30
enum opcode {
/* These are the actual hardware opcodes. */
+ BRW_OPCODE_ILLEGAL = 0,
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_MOVI = 3, /**< G45+ */
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
+ BRW_OPCODE_DIM = 10, /**< Gen7.5 only */ /* Reused */
+ // BRW_OPCODE_SMOV = 10, /**< Gen8+ */ /* Reused */
+ /* Reserved - 11 */
BRW_OPCODE_ASR = 12,
+ /* Reserved - 13-15 */
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_CSEL = 18, /**< Gen8+ */
BRW_OPCODE_F32TO16 = 19, /**< Gen7 only */
BRW_OPCODE_F16TO32 = 20, /**< Gen7 only */
+ /* Reserved - 21-22 */
BRW_OPCODE_BFREV = 23, /**< Gen7+ */
BRW_OPCODE_BFE = 24, /**< Gen7+ */
BRW_OPCODE_BFI1 = 25, /**< Gen7+ */
BRW_OPCODE_BFI2 = 26, /**< Gen7+ */
+ /* Reserved - 27-31 */
BRW_OPCODE_JMPI = 32,
+ // BRW_OPCODE_BRD = 33, /**< Gen7+ */
BRW_OPCODE_IF = 34,
- BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */
+ BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_BRC = 35, /**< Gen7+ */ /* Reused */
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
- BRW_OPCODE_DO = 38,
+ BRW_OPCODE_DO = 38, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_CASE = 38, /**< Gen6 only */ /* Reused */
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
- BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */
- BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */
- BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */
- BRW_OPCODE_GOTO = 46, /**< Gen8+ */
- BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
+ // BRW_OPCODE_CALLA = 43, /**< Gen7.5+ */
+ // BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_CALL = 44, /**< Gen6+ */ /* Reused */
+ // BRW_OPCODE_MREST = 45, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_RET = 45, /**< Gen6+ */ /* Reused */
+ // BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_FORK = 46, /**< Gen6 only */ /* Reused */
+ // BRW_OPCODE_GOTO = 46, /**< Gen8+ */ /* Reused */
+ // BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_SENDS = 51, /**< Gen9+ */
+ BRW_OPCODE_SENDSC = 52, /**< Gen9+ */
+ /* Reserved 53-55 */
BRW_OPCODE_MATH = 56, /**< Gen6+ */
+ /* Reserved 57-63 */
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
BRW_OPCODE_SUBB = 79, /**< Gen7+ */
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
+ /* Reserved 82-83 */
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
+ /* Reserved 88 */
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90, /**< G45+ */
BRW_OPCODE_MAD = 91, /**< Gen6+ */
BRW_OPCODE_LRP = 92, /**< Gen6+ */
+ // BRW_OPCODE_MADM = 93, /**< Gen8+ */
+ /* Reserved 94-124 */
BRW_OPCODE_NENOP = 125, /**< G45 only */
BRW_OPCODE_NOP = 126,
+ /* Reserved 127 */
/* These are compiler backend opcodes that get translated into other
* instructions.
/**
* Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
- * individual sources instead of as a single payload blob:
- *
- * Source 0: [required] Color 0.
- * Source 1: [optional] Color 1 (for dual source blend messages).
- * Source 2: [optional] Src0 Alpha.
- * Source 3: [optional] Source Depth (passthrough from the thread payload).
- * Source 4: [optional] Destination Depth (gl_FragDepth).
- * Source 5: [optional] Sample Mask (gl_SampleMask).
- * Source 6: [required] Number of color components (as a UD immediate).
+ * individual sources instead of as a single payload blob. The
+ * position/ordering of the arguments are defined by the enum
+ * fb_write_logical_srcs.
*/
FS_OPCODE_FB_WRITE_LOGICAL,
- FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
+
+ FS_OPCODE_FB_READ,
+ FS_OPCODE_FB_READ_LOGICAL,
+
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
*
* LOGICAL opcodes are eventually translated to the matching non-LOGICAL
* opcode but instead of taking a single payload blob they expect their
- * arguments separately as individual sources:
- *
- * Source 0: [optional] Texture coordinates.
- * Source 1: [optional] Shadow comparitor.
- * Source 2: [optional] dPdx if the operation takes explicit derivatives,
- * otherwise LOD value.
- * Source 3: [optional] dPdy if the operation takes explicit derivatives.
- * Source 4: [optional] Sample index.
- * Source 5: [optional] MCS data.
- * Source 6: [required] Texture sampler.
- * Source 7: [optional] Texel offset.
- * Source 8: [required] Number of coordinate components (as UD immediate).
- * Source 9: [required] Number derivative components (as UD immediate).
+ * arguments separately as individual sources. The position/ordering of the
+ * arguments are defined by the enum tex_logical_srcs.
*/
SHADER_OPCODE_TEX,
SHADER_OPCODE_TEX_LOGICAL,
SHADER_OPCODE_TXD_LOGICAL,
SHADER_OPCODE_TXF,
SHADER_OPCODE_TXF_LOGICAL,
+ SHADER_OPCODE_TXF_LZ,
SHADER_OPCODE_TXL,
SHADER_OPCODE_TXL_LOGICAL,
+ SHADER_OPCODE_TXL_LZ,
SHADER_OPCODE_TXS,
SHADER_OPCODE_TXS_LOGICAL,
FS_OPCODE_TXB,
FS_OPCODE_TXB_LOGICAL,
SHADER_OPCODE_TXF_CMS,
SHADER_OPCODE_TXF_CMS_LOGICAL,
+ SHADER_OPCODE_TXF_CMS_W,
+ SHADER_OPCODE_TXF_CMS_W_LOGICAL,
SHADER_OPCODE_TXF_UMS,
SHADER_OPCODE_TXF_UMS_LOGICAL,
SHADER_OPCODE_TXF_MCS,
SHADER_OPCODE_TG4_OFFSET,
SHADER_OPCODE_TG4_OFFSET_LOGICAL,
SHADER_OPCODE_SAMPLEINFO,
+ SHADER_OPCODE_SAMPLEINFO_LOGICAL,
/**
* Combines multiple sources of size 1 into a larger virtual GRF.
*/
SHADER_OPCODE_LOAD_PAYLOAD,
+ /**
+ * Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this
+ * acts intra-channel, obtaining the final value for each channel by
+ * combining the sources values for the same channel, the first source
+ * occupying the lowest bits and the last source occupying the highest
+ * bits.
+ */
+ FS_OPCODE_PACK,
+
SHADER_OPCODE_SHADER_TIME_ADD,
/**
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
+ /**
+ * Gen8+ SIMD8 URB Read messages.
+ */
+ SHADER_OPCODE_URB_READ_SIMD8,
+ SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
+
SHADER_OPCODE_URB_WRITE_SIMD8,
+ SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
+ SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
+ SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
/**
* Return the index of an arbitrary live channel (i.e. one of the channels
/**
* Pick the channel from its first source register given by the index
* specified as second source. Useful for variable indexing of surfaces.
+ *
+ * Note that because the result of this instruction is by definition
+ * uniform and it can always be splatted to multiple channels using a
+ * scalar regioning mode, only the first channel of the destination region
+ * is guaranteed to be updated, which implies that BROADCAST instructions
+ * should usually be marked force_writemask_all.
*/
SHADER_OPCODE_BROADCAST,
FS_OPCODE_DDX_FINE,
/**
* Compute dFdy(), dFdyCoarse(), or dFdyFine().
- * src1 is an immediate storing the key->render_to_fbo boolean.
*/
FS_OPCODE_DDY_COARSE,
FS_OPCODE_DDY_FINE,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
- FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
+ FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
+ FS_OPCODE_GET_BUFFER_SIZE,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
FS_OPCODE_PLACEHOLDER_HALT,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
* Calculate the high 32-bits of a 32x32 multiply.
*/
SHADER_OPCODE_MULH,
+
+ /**
+ * A MOV that uses VxH indirect addressing.
+ *
+ * Source 0: A register to start from (HW_REG).
+ * Source 1: An indirect offset (in bytes, UD GRF).
+ * Source 2: The length of the region that could be accessed (in bytes,
+ * UD immediate).
+ */
+ SHADER_OPCODE_MOV_INDIRECT,
+
+ VEC4_OPCODE_URB_READ,
+ TCS_OPCODE_GET_INSTANCE_ID,
+ TCS_OPCODE_URB_WRITE,
+ TCS_OPCODE_SET_INPUT_URB_OFFSETS,
+ TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
+ TCS_OPCODE_GET_PRIMITIVE_ID,
+ TCS_OPCODE_CREATE_BARRIER_HEADER,
+ TCS_OPCODE_SRC0_010_IS_ZERO,
+ TCS_OPCODE_RELEASE_INPUT,
+ TCS_OPCODE_THREAD_END,
+
+ TES_OPCODE_GET_PRIMITIVE_ID,
+ TES_OPCODE_CREATE_INPUT_READ_HEADER,
+ TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
};
enum brw_urb_write_flags {
BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
};
+enum fb_write_logical_srcs {
+ FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */
+ FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */
+ FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
+ FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
+ FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
+ FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
+ FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
+ FB_WRITE_LOGICAL_NUM_SRCS
+};
+
+enum tex_logical_srcs {
+ /** Texture coordinates */
+ TEX_LOGICAL_SRC_COORDINATE,
+ /** Shadow comparitor */
+ TEX_LOGICAL_SRC_SHADOW_C,
+ /** dPdx if the operation takes explicit derivatives, otherwise LOD value */
+ TEX_LOGICAL_SRC_LOD,
+ /** dPdy if the operation takes explicit derivatives */
+ TEX_LOGICAL_SRC_LOD2,
+ /** Sample index */
+ TEX_LOGICAL_SRC_SAMPLE_INDEX,
+ /** MCS data */
+ TEX_LOGICAL_SRC_MCS,
+ /** REQUIRED: Texture surface index */
+ TEX_LOGICAL_SRC_SURFACE,
+ /** Texture sampler index */
+ TEX_LOGICAL_SRC_SAMPLER,
+ /** Texel offset for gathers */
+ TEX_LOGICAL_SRC_OFFSET_VALUE,
+ /** REQUIRED: Number of coordinate components (as UD immediate) */
+ TEX_LOGICAL_SRC_COORD_COMPONENTS,
+ /** REQUIRED: Number of derivative components (as UD immediate) */
+ TEX_LOGICAL_SRC_GRAD_COMPONENTS,
+
+ TEX_LOGICAL_NUM_SRCS,
+};
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
BRW_PREDICATE_ALIGN16_ALL4H = 7,
};
-#define BRW_ARCHITECTURE_REGISTER_FILE 0
-#define BRW_GENERAL_REGISTER_FILE 1
-#define BRW_MESSAGE_REGISTER_FILE 2
-#define BRW_IMMEDIATE_VALUE 3
+enum PACKED brw_reg_file {
+ BRW_ARCHITECTURE_REGISTER_FILE = 0,
+ BRW_GENERAL_REGISTER_FILE = 1,
+ BRW_MESSAGE_REGISTER_FILE = 2,
+ BRW_IMMEDIATE_VALUE = 3,
+
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ FIXED_GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+
+ /* These are not hardware values */
+ VGRF,
+ ATTR,
+ UNIFORM, /* prog_data->params[reg] */
+ BAD_FILE,
+};
#define BRW_HW_REG_TYPE_UD 0
#define BRW_HW_REG_TYPE_D 1
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LZ 24
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ 25
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ 26
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
+/* GEN9 */
+#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
+#define GEN9_DATAPORT_RC_RENDER_TARGET_READ 13
+
+/* Dataport special binding table indices: */
+#define BRW_BTI_STATELESS 255
+#define GEN7_BTI_SLM 254
+/* Note that on Gen8+ BTI 255 was redefined to be IA-coherent according to the
+ * hardware spec, however because the DRM sets bit 4 of HDC_CHICKEN0 on BDW,
+ * CHV and at least some pre-production steppings of SKL due to
+ * WaForceEnableNonCoherent, HDC memory access may have been overridden by the
+ * kernel to be non-coherent (matching the behavior of the same BTI on
+ * pre-Gen8 hardware) and BTI 255 may actually be an alias for BTI 253.
+ */
+#define GEN8_BTI_STATELESS_IA_COHERENT 255
+#define GEN8_BTI_STATELESS_NON_COHERENT 253
+
/* dataport atomic operations. */
#define BRW_AOP_AND 1
#define BRW_AOP_OR 2
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
+ *
+ * Identical for VS, DS, and HS.
*/
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64)
+
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
*/
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GEN6_GS_ENABLE (1 << 15)
+/* Gen8+ DW8 */
+# define GEN8_GS_STATIC_OUTPUT (1 << 30)
+# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT 16
+# define GEN8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
+
/* Gen8+ DW9 */
# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
-# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
+# define GEN9_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
# define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT 0
-enum brw_wm_barycentric_interp_mode {
- BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
- BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1,
- BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2,
- BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3,
- BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4,
- BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5,
- BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6
+enum brw_barycentric_mode {
+ BRW_BARYCENTRIC_PERSPECTIVE_PIXEL = 0,
+ BRW_BARYCENTRIC_PERSPECTIVE_CENTROID = 1,
+ BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE = 2,
+ BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL = 3,
+ BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID = 4,
+ BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
+ BRW_BARYCENTRIC_MODE_COUNT = 6
};
-#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \
- ((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \
- (1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \
- (1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+#define BRW_BARYCENTRIC_NONPERSPECTIVE_BITS \
+ ((1 << BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
+ (1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
+ (1 << BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
#define _3DSTATE_WM 0x7814 /* GEN6+ */
/* DW1: kernel pointer */
#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */
#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */
+/* Resource streamer gather constants */
+#define _3DSTATE_GATHER_POOL_ALLOC 0x791A /* GEN7.5+ */
+#define HSW_GATHER_POOL_ALLOC_MUST_BE_ONE (3 << 4) /* GEN7.5 only */
+
+#define _3DSTATE_GATHER_CONSTANT_VS 0x7834 /* GEN7.5+ */
+#define _3DSTATE_GATHER_CONSTANT_GS 0x7835
+#define _3DSTATE_GATHER_CONSTANT_HS 0x7836
+#define _3DSTATE_GATHER_CONSTANT_DS 0x7837
+#define _3DSTATE_GATHER_CONSTANT_PS 0x7838
+#define HSW_GATHER_CONSTANT_ENABLE (1 << 11)
+#define HSW_GATHER_CONSTANT_BUFFER_VALID_SHIFT 16
+#define HSW_GATHER_CONSTANT_BUFFER_VALID_MASK INTEL_MASK(31, 16)
+#define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_SHIFT 12
+#define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_MASK INTEL_MASK(15, 12)
+#define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_SHIFT 8
+#define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_MASK INTEL_MASK(15, 8)
+#define HSW_GATHER_CONSTANT_CHANNEL_MASK_SHIFT 4
+#define HSW_GATHER_CONSTANT_CHANNEL_MASK_MASK INTEL_MASK(7, 4)
+
#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */
/* DW1 */
# define SO_FUNCTION_ENABLE (1 << 31)
# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
+# define GEN9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6)
# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
/* GEN7 DW5, GEN8+ DW6 */
# define MEDIA_BARRIER_ENABLE_SHIFT 21
# define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21)
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT 16
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK INTEL_MASK(20, 16)
# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
+/* GEN7 DW6, GEN8+ DW7 */
+# define CROSS_THREAD_READ_LENGTH_SHIFT 0
+# define CROSS_THREAD_READ_LENGTH_MASK INTEL_MASK(7, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
/* GEN7 DW0 */
# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
+# define GEN7_GPGPU_PREDICATE_ENABLE (1 << 8)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0
# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0)
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+
+#define MI_FLUSH (CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE (1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2)
+
+#define MI_STORE_DATA_IMM (CMD_MI | (0x20 << 23))
+#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23))
+#define MI_LOAD_REGISTER_REG (CMD_MI | (0x2A << 23))
+
+#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2)
+
+#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23))
+# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22)
+# define MI_STORE_REGISTER_MEM_PREDICATE (1 << 21)
+
+/* Load a value from memory into a register. Only available on Gen7+. */
+#define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
+# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
+/* Haswell RS control */
+#define MI_RS_CONTROL (CMD_MI | (0x6 << 23))
+#define MI_RS_STORE_DATA_IMM (CMD_MI | (0x2b << 23))
+
+/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
+#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23))
+# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
+# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
+# define MI_PREDICATE_LOADOP_LOADINV (3 << 6)
+# define MI_PREDICATE_COMBINEOP_SET (0 << 3)
+# define MI_PREDICATE_COMBINEOP_AND (1 << 3)
+# define MI_PREDICATE_COMBINEOP_OR (2 << 3)
+# define MI_PREDICATE_COMBINEOP_XOR (3 << 3)
+# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0)
+# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0)
+# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0)
+# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
+
+#define HSW_MI_MATH (CMD_MI | (0x1a << 23))
+
+#define MI_MATH_ALU2(opcode, operand1, operand2) \
+ ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) | \
+ ((MI_MATH_OPERAND_##operand2) << 0) )
+
+#define MI_MATH_ALU1(opcode, operand1) \
+ ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) )
+
+#define MI_MATH_ALU0(opcode) \
+ ( ((MI_MATH_OPCODE_##opcode) << 20) )
+
+#define MI_MATH_OPCODE_NOOP 0x000
+#define MI_MATH_OPCODE_LOAD 0x080
+#define MI_MATH_OPCODE_LOADINV 0x480
+#define MI_MATH_OPCODE_LOAD0 0x081
+#define MI_MATH_OPCODE_LOAD1 0x481
+#define MI_MATH_OPCODE_ADD 0x100
+#define MI_MATH_OPCODE_SUB 0x101
+#define MI_MATH_OPCODE_AND 0x102
+#define MI_MATH_OPCODE_OR 0x103
+#define MI_MATH_OPCODE_XOR 0x104
+#define MI_MATH_OPCODE_STORE 0x180
+#define MI_MATH_OPCODE_STOREINV 0x580
+
+#define MI_MATH_OPERAND_R0 0x00
+#define MI_MATH_OPERAND_R1 0x01
+#define MI_MATH_OPERAND_R2 0x02
+#define MI_MATH_OPERAND_R3 0x03
+#define MI_MATH_OPERAND_R4 0x04
+#define MI_MATH_OPERAND_SRCA 0x20
+#define MI_MATH_OPERAND_SRCB 0x21
+#define MI_MATH_OPERAND_ACCU 0x31
+#define MI_MATH_OPERAND_ZF 0x32
+#define MI_MATH_OPERAND_CF 0x33
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24))
+#define PIPE_CONTROL_CS_STALL (1 << 20)
+#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
+#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
+#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
+#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
+#define PIPE_CONTROL_NO_WRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
+#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
+#define PIPE_CONTROL_ISP_DIS (1 << 9)
+#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
+/* GT */
+#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5)
+#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
+#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+ PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+ (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
+/** @} */
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
+
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22))
+
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
+
+#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22))
+
+#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED (1 << 16)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR00 */
+#define XY_FAST_SRC_TILED_64K (3 << 20)
+#define XY_FAST_SRC_TILED_Y (2 << 20)
+#define XY_FAST_SRC_TILED_X (1 << 20)
+
+#define XY_FAST_DST_TILED_64K (3 << 13)
+#define XY_FAST_DST_TILED_Y (2 << 13)
+#define XY_FAST_DST_TILED_X (1 << 13)
+
+/* BR13 */
+#define BR13_8 (0x0 << 24)
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+#define BR13_16161616 (0x4 << 24)
+#define BR13_32323232 (0x5 << 24)
+
+#define XY_FAST_SRC_TRMODE_YF (1 << 31)
+#define XY_FAST_DST_TRMODE_YF (1 << 30)
+
+/* Pipeline Statistics Counter Registers */
+#define IA_VERTICES_COUNT 0x2310
+#define IA_PRIMITIVES_COUNT 0x2318
+#define VS_INVOCATION_COUNT 0x2320
+#define HS_INVOCATION_COUNT 0x2300
+#define DS_INVOCATION_COUNT 0x2308
+#define GS_INVOCATION_COUNT 0x2328
+#define GS_PRIMITIVES_COUNT 0x2330
+#define CL_INVOCATION_COUNT 0x2338
+#define CL_PRIMITIVES_COUNT 0x2340
+#define PS_INVOCATION_COUNT 0x2348
+#define CS_INVOCATION_COUNT 0x2290
+#define PS_DEPTH_COUNT 0x2350
+
+#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
+
+#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+
+#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4)
+
+#define TIMESTAMP 0x2358
+
+#define BCS_SWCTRL 0x22200
+# define BCS_SWCTRL_SRC_Y (1 << 0)
+# define BCS_SWCTRL_DST_Y (1 << 1)
+
+#define OACONTROL 0x2360
+# define OACONTROL_COUNTER_SELECT_SHIFT 2
+# define OACONTROL_ENABLE_COUNTERS (1 << 0)
+
+/* Auto-Draw / Indirect Registers */
+#define GEN7_3DPRIM_END_OFFSET 0x2420
+#define GEN7_3DPRIM_START_VERTEX 0x2430
+#define GEN7_3DPRIM_VERTEX_COUNT 0x2434
+#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
+#define GEN7_3DPRIM_START_INSTANCE 0x243C
+#define GEN7_3DPRIM_BASE_VERTEX 0x2440
+
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX 0x2500
+#define GEN7_GPGPU_DISPATCHDIMY 0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ 0x2508
+
+#define GEN7_CACHE_MODE_1 0x7004
+# define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
+# define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
+# define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
+# define GEN8_HIZ_PMA_MASK_BITS \
+ REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
+
+/* Predicate registers */
+#define MI_PREDICATE_SRC0 0x2400
+#define MI_PREDICATE_SRC1 0x2408
+#define MI_PREDICATE_DATA 0x2410
+#define MI_PREDICATE_RESULT 0x2418
+#define MI_PREDICATE_RESULT_1 0x241C
+#define MI_PREDICATE_RESULT_2 0x2214
+
+#define HSW_CS_GPR(n) (0x2600 + (n) * 8)
+
+/* L3 cache control registers. */
+#define GEN7_L3SQCREG1 0xb010
+/* L3SQ general and high priority credit initialization. */
+# define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000
+# define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000
+# define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000
+# define GEN7_L3SQCREG1_CONV_DC_UC (1 << 24)
+# define GEN7_L3SQCREG1_CONV_IS_UC (1 << 25)
+# define GEN7_L3SQCREG1_CONV_C_UC (1 << 26)
+# define GEN7_L3SQCREG1_CONV_T_UC (1 << 27)
+
+#define GEN7_L3CNTLREG2 0xb020
+# define GEN7_L3CNTLREG2_SLM_ENABLE (1 << 0)
+# define GEN7_L3CNTLREG2_URB_ALLOC_SHIFT 1
+# define GEN7_L3CNTLREG2_URB_ALLOC_MASK INTEL_MASK(6, 1)
+# define GEN7_L3CNTLREG2_URB_LOW_BW (1 << 7)
+# define GEN7_L3CNTLREG2_ALL_ALLOC_SHIFT 8
+# define GEN7_L3CNTLREG2_ALL_ALLOC_MASK INTEL_MASK(13, 8)
+# define GEN7_L3CNTLREG2_RO_ALLOC_SHIFT 14
+# define GEN7_L3CNTLREG2_RO_ALLOC_MASK INTEL_MASK(19, 14)
+# define GEN7_L3CNTLREG2_RO_LOW_BW (1 << 20)
+# define GEN7_L3CNTLREG2_DC_ALLOC_SHIFT 21
+# define GEN7_L3CNTLREG2_DC_ALLOC_MASK INTEL_MASK(26, 21)
+# define GEN7_L3CNTLREG2_DC_LOW_BW (1 << 27)
+
+#define GEN7_L3CNTLREG3 0xb024
+# define GEN7_L3CNTLREG3_IS_ALLOC_SHIFT 1
+# define GEN7_L3CNTLREG3_IS_ALLOC_MASK INTEL_MASK(6, 1)
+# define GEN7_L3CNTLREG3_IS_LOW_BW (1 << 7)
+# define GEN7_L3CNTLREG3_C_ALLOC_SHIFT 8
+# define GEN7_L3CNTLREG3_C_ALLOC_MASK INTEL_MASK(13, 8)
+# define GEN7_L3CNTLREG3_C_LOW_BW (1 << 14)
+# define GEN7_L3CNTLREG3_T_ALLOC_SHIFT 15
+# define GEN7_L3CNTLREG3_T_ALLOC_MASK INTEL_MASK(20, 15)
+# define GEN7_L3CNTLREG3_T_LOW_BW (1 << 21)
+
+#define HSW_SCRATCH1 0xb038
+#define HSW_SCRATCH1_L3_ATOMIC_DISABLE (1 << 27)
+
+#define HSW_ROW_CHICKEN3 0xe49c
+#define HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE (1 << 6)
+
+#define GEN8_L3CNTLREG 0x7034
+# define GEN8_L3CNTLREG_SLM_ENABLE (1 << 0)
+# define GEN8_L3CNTLREG_URB_ALLOC_SHIFT 1
+# define GEN8_L3CNTLREG_URB_ALLOC_MASK INTEL_MASK(7, 1)
+# define GEN8_L3CNTLREG_RO_ALLOC_SHIFT 11
+# define GEN8_L3CNTLREG_RO_ALLOC_MASK INTEL_MASK(17, 11)
+# define GEN8_L3CNTLREG_DC_ALLOC_SHIFT 18
+# define GEN8_L3CNTLREG_DC_ALLOC_MASK INTEL_MASK(24, 18)
+# define GEN8_L3CNTLREG_ALL_ALLOC_SHIFT 25
+# define GEN8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25)
+
#endif