fieldval & field ## _MASK; \
})
+#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
#ifndef BRW_DEFINES_H
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
# define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10)
+# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8)
/* DW1 */
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
#define GEN7_SURFACE_ARYSPC_FULL (0 << 10)
#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10)
-/* Surface state DW0 */
+/* Surface state DW1 */
#define GEN8_SURFACE_MOCS_SHIFT 24
#define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24)
+#define GEN8_SURFACE_QPITCH_SHIFT 0
+#define GEN8_SURFACE_QPITCH_MASK INTEL_MASK(14, 0)
/* Surface state DW2 */
#define BRW_SURFACE_HEIGHT_SHIFT 19
#define GEN7_SURFACE_MOCS_SHIFT 16
#define GEN7_SURFACE_MOCS_MASK INTEL_MASK(19, 16)
+#define GEN9_SURFACE_TRMODE_SHIFT 18
+#define GEN9_SURFACE_TRMODE_MASK INTEL_MASK(19, 18)
+#define GEN9_SURFACE_TRMODE_NONE 0
+#define GEN9_SURFACE_TRMODE_TILEYF 1
+#define GEN9_SURFACE_TRMODE_TILEYS 2
+
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8)
+
/* Surface state DW6 */
#define GEN7_SURFACE_MCS_ENABLE (1 << 0)
#define GEN7_SURFACE_MCS_PITCH_SHIFT 3
#define GEN8_SURFACE_AUX_MODE_HIZ 3
/* Surface state DW7 */
+#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30
+#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30)
#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28
#define GEN7_SURFACE_SCS_R_SHIFT 25
#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
+
+ /**
+ * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
+ * individual sources instead of as a single payload blob:
+ *
+ * Source 0: [required] Color 0.
+ * Source 1: [optional] Color 1 (for dual source blend messages).
+ * Source 2: [optional] Src0 Alpha.
+ * Source 3: [optional] Source Depth (passthrough from the thread payload).
+ * Source 4: [optional] Destination Depth (gl_FragDepth).
+ * Source 5: [optional] Sample Mask (gl_SampleMask).
+ * Source 6: [required] Number of color components (as a UD immediate).
+ */
+ FS_OPCODE_FB_WRITE_LOGICAL,
+
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
SHADER_OPCODE_RCP,
SHADER_OPCODE_UNTYPED_ATOMIC,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
+ SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+
+ SHADER_OPCODE_TYPED_ATOMIC,
+ SHADER_OPCODE_TYPED_SURFACE_READ,
+ SHADER_OPCODE_TYPED_SURFACE_WRITE,
+
+ SHADER_OPCODE_MEMORY_FENCE,
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_URB_WRITE_SIMD8,
+ /**
+ * Return the index of an arbitrary live channel (i.e. one of the channels
+ * enabled in the current execution mask) and assign it to the first
+ * component of the destination. Expected to be used as input for the
+ * BROADCAST pseudo-opcode.
+ */
+ SHADER_OPCODE_FIND_LIVE_CHANNEL,
+
+ /**
+ * Pick the channel from its first source register given by the index
+ * specified as second source. Useful for variable indexing of surfaces.
+ */
+ SHADER_OPCODE_BROADCAST,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
* and number of SO primitives needed.
*/
GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+
+ /**
+ * Terminate the compute shader.
+ */
+ CS_OPCODE_CS_TERMINATE,
+
+ /**
+ * GLSL barrier()
+ */
+ SHADER_OPCODE_BARRIER,
};
enum brw_urb_write_flags {
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
+#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0
+#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1
+#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2
+#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3
+#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4
+#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5
+#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6
+
#define CMD_URB_FENCE 0x6000
#define CMD_CS_URB_STATE 0x6001
#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */
+#define BRW_HW_BINDING_TABLE_ENABLE (1 << 11)
+#define GEN7_HW_BT_POOL_MOCS_SHIFT 7
+#define GEN7_HW_BT_POOL_MOCS_MASK INTEL_MASK(10, 7)
+#define GEN8_HW_BT_POOL_MOCS_SHIFT 0
+#define GEN8_HW_BT_POOL_MOCS_MASK INTEL_MASK(6, 0)
+/* Only required in HSW */
+#define HSW_BT_POOL_ALLOC_MUST_BE_ONE (3 << 5)
+
+#define _3DSTATE_BINDING_TABLE_EDIT_VS 0x7843 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_GS 0x7844 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_HS 0x7845 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_DS 0x7846 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_PS 0x7847 /* GEN7.5 */
+#define BRW_BINDING_TABLE_INDEX_SHIFT 16
+#define BRW_BINDING_TABLE_INDEX_MASK INTEL_MASK(23, 16)
+
+#define BRW_BINDING_TABLE_EDIT_TARGET_ALL 3
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1 2
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0 1
+/* In HSW, when editing binding table entries to surface state offsets,
+ * the surface state offset is a 16-bit value aligned to 32 bytes. But
+ * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset
+ * by 5 bits so it won't disturb bit 16 (which is used as the binding
+ * table index entry), otherwise it would hang the GPU.
+ */
+#define HSW_SURFACE_STATE_EDIT(value) (value >> 5)
+/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/
+#define GEN8_SURFACE_STATE_EDIT(value) (value >> 6)
+
#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
-# define GEN7_NUM_VIEWPORTS 16
+# define GEN6_NUM_VIEWPORTS 16
#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */
#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20
# define GEN7_GS_INSTANCE_CONTROL_SHIFT 15
-# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
+# define GEN7_GS_DISPATCH_MODE_SHIFT 11
+# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11)
# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
# define GEN6_GS_RENDERING_ENABLE (1 << 8)
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
+# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
# define GEN7_PS_SPF_MODE (1 << 31)
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
+# define GEN7_PS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
#define BDW_MOCS_WT 0x58
#define BDW_MOCS_PTE 0x18
-/* Skylake: MOCS is now an index into an array of 64 different configurable
- * cache settings. We still use only either write-back or write-through; and
- * rely on the documented default values.
+/* Skylake: MOCS is now an index into an array of 62 different caching
+ * configurations programmed by the kernel.
*/
-#define SKL_MOCS_WB 9
-#define SKL_MOCS_WT 5
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define SKL_MOCS_WB (2 << 1)
+/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE (1 << 1)
+
+#define MEDIA_VFE_STATE 0x7000
+/* GEN7 DW2, GEN8+ DW3 */
+# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT 16
+# define MEDIA_VFE_STATE_MAX_THREADS_MASK INTEL_MASK(31, 16)
+# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT 8
+# define MEDIA_VFE_STATE_URB_ENTRIES_MASK INTEL_MASK(15, 8)
+# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT 7
+# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK INTEL_MASK(7, 7)
+# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT 6
+# define MEDIA_VFE_STATE_BYPASS_GTW_MASK INTEL_MASK(6, 6)
+# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT 2
+# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_MASK INTEL_MASK(2, 2)
+/* GEN7 DW4, GEN8+ DW5 */
+# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT 16
+# define MEDIA_VFE_STATE_URB_ALLOC_MASK INTEL_MASK(31, 16)
+# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0
+# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
+
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
+#define MEDIA_STATE_FLUSH 0x7004
+#define GPGPU_WALKER 0x7105
+/* GEN8+ DW2 */
+# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
+# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
+/* GEN7 DW2, GEN8+ DW4 */
+# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30
+# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30)
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16)
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8)
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0)
#endif