X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_defines.h;h=9f8d7337047ed522df14f1aabd238a1cf253001e;hb=a9f31a032b0a1068a4e2ceed9ed4680ecf13e28b;hp=a3fe2dd04cd07d1dcc3368f17bb26efa5c6787da;hpb=20d9286f71253004a91acbcf4c257e84ee7df077;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index a3fe2dd04cd..9f8d7337047 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -38,6 +38,7 @@ fieldval & field ## _MASK; \ }) +#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) #define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) #ifndef BRW_DEFINES_H @@ -51,6 +52,7 @@ # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8) /* DW1 */ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) @@ -77,6 +79,13 @@ #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 +/* We use this offset to be able to pass native primitive types in struct + * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + + * native_type, which should be different from all GL types and still fit in + * the 8 bits avialable. */ + +#define BRW_PRIM_OFFSET 0x80 + #define BRW_ANISORATIO_2 0 #define BRW_ANISORATIO_4 1 #define BRW_ANISORATIO_6 2 @@ -256,6 +265,17 @@ #define BRW_STENCILOP_INVERT 7 /* Surface state DW0 */ +#define GEN8_SURFACE_IS_ARRAY (1 << 28) +#define GEN8_SURFACE_VALIGN_4 (1 << 16) +#define GEN8_SURFACE_VALIGN_8 (2 << 16) +#define GEN8_SURFACE_VALIGN_16 (3 << 16) +#define GEN8_SURFACE_HALIGN_4 (1 << 14) +#define GEN8_SURFACE_HALIGN_8 (2 << 14) +#define GEN8_SURFACE_HALIGN_16 (3 << 14) +#define GEN8_SURFACE_TILING_NONE (0 << 12) +#define GEN8_SURFACE_TILING_W (1 << 12) +#define GEN8_SURFACE_TILING_X (2 << 12) +#define GEN8_SURFACE_TILING_Y (3 << 12) #define BRW_SURFACE_RC_READ_WRITE (1 << 8) #define BRW_SURFACE_MIPLAYOUT_SHIFT 10 #define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 @@ -510,6 +530,12 @@ #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) +/* Surface state DW1 */ +#define GEN8_SURFACE_MOCS_SHIFT 24 +#define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) +#define GEN8_SURFACE_QPITCH_SHIFT 0 +#define GEN8_SURFACE_QPITCH_MASK INTEL_MASK(14, 0) + /* Surface state DW2 */ #define BRW_SURFACE_HEIGHT_SHIFT 19 #define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) @@ -529,15 +555,22 @@ #define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) #define BRW_SURFACE_TILED (1 << 1) #define BRW_SURFACE_TILED_Y (1 << 0) +#define HSW_SURFACE_IS_INTEGER_FORMAT (1 << 18) /* Surface state DW4 */ #define BRW_SURFACE_MIN_LOD_SHIFT 28 #define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) +#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 17 +#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(27, 17) +#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 8 +#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(16, 8) #define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) #define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) #define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3) +#define GEN8_SURFACE_MULTISAMPLECOUNT_2 (1 << 3) #define GEN7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3) #define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) +#define GEN8_SURFACE_MULTISAMPLECOUNT_16 (4 << 3) #define GEN7_SURFACE_MSFMT_MSS (0 << 6) #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6) #define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 18 @@ -553,16 +586,39 @@ #define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) #define GEN7_SURFACE_MIN_LOD_SHIFT 4 #define GEN7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4) +#define GEN8_SURFACE_Y_OFFSET_SHIFT 21 +#define GEN8_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 21) #define GEN7_SURFACE_MOCS_SHIFT 16 #define GEN7_SURFACE_MOCS_MASK INTEL_MASK(19, 16) +#define GEN9_SURFACE_TRMODE_SHIFT 18 +#define GEN9_SURFACE_TRMODE_MASK INTEL_MASK(19, 18) +#define GEN9_SURFACE_TRMODE_NONE 0 +#define GEN9_SURFACE_TRMODE_TILEYF 1 +#define GEN9_SURFACE_TRMODE_TILEYS 2 + +#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8 +#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8) + /* Surface state DW6 */ #define GEN7_SURFACE_MCS_ENABLE (1 << 0) #define GEN7_SURFACE_MCS_PITCH_SHIFT 3 #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) +#define GEN8_SURFACE_AUX_QPITCH_SHIFT 16 +#define GEN8_SURFACE_AUX_QPITCH_MASK INTEL_MASK(30, 16) +#define GEN8_SURFACE_AUX_PITCH_SHIFT 3 +#define GEN8_SURFACE_AUX_PITCH_MASK INTEL_MASK(11, 3) +#define GEN8_SURFACE_AUX_MODE_MASK INTEL_MASK(2, 0) + +#define GEN8_SURFACE_AUX_MODE_NONE 0 +#define GEN8_SURFACE_AUX_MODE_MCS 1 +#define GEN8_SURFACE_AUX_MODE_APPEND 2 +#define GEN8_SURFACE_AUX_MODE_HIZ 3 /* Surface state DW7 */ +#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30 +#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30) #define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) @@ -581,12 +637,69 @@ #define HSW_SCS_BLUE 6 #define HSW_SCS_ALPHA 7 -#define BRW_TEXCOORDMODE_WRAP 0 -#define BRW_TEXCOORDMODE_MIRROR 1 -#define BRW_TEXCOORDMODE_CLAMP 2 -#define BRW_TEXCOORDMODE_CUBE 3 -#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 -#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 +/* SAMPLER_STATE DW0 */ +#define BRW_SAMPLER_DISABLE (1 << 31) +#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE (1 << 28) +#define GEN6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gen6 only */ +#define BRW_SAMPLER_BASE_MIPLEVEL_MASK INTEL_MASK(26, 22) +#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT 22 +#define BRW_SAMPLER_MIP_FILTER_MASK INTEL_MASK(21, 20) +#define BRW_SAMPLER_MIP_FILTER_SHIFT 20 +#define BRW_SAMPLER_MAG_FILTER_MASK INTEL_MASK(19, 17) +#define BRW_SAMPLER_MAG_FILTER_SHIFT 17 +#define BRW_SAMPLER_MIN_FILTER_MASK INTEL_MASK(16, 14) +#define BRW_SAMPLER_MIN_FILTER_SHIFT 14 +#define GEN4_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 3) +#define GEN4_SAMPLER_LOD_BIAS_SHIFT 3 +#define GEN4_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(2, 0) +#define GEN4_SAMPLER_SHADOW_FUNCTION_SHIFT 0 + +#define GEN7_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 1) +#define GEN7_SAMPLER_LOD_BIAS_SHIFT 1 +#define GEN7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM (1 << 0) + +/* SAMPLER_STATE DW1 */ +#define GEN4_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 22) +#define GEN4_SAMPLER_MIN_LOD_SHIFT 22 +#define GEN4_SAMPLER_MAX_LOD_MASK INTEL_MASK(21, 12) +#define GEN4_SAMPLER_MAX_LOD_SHIFT 12 +#define GEN4_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 9) +/* Wrap modes are in DW1 on Gen4-6 and DW3 on Gen7+ */ +#define BRW_SAMPLER_TCX_WRAP_MODE_MASK INTEL_MASK(8, 6) +#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT 6 +#define BRW_SAMPLER_TCY_WRAP_MODE_MASK INTEL_MASK(5, 3) +#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT 3 +#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK INTEL_MASK(2, 0) +#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT 0 + +#define GEN7_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 20) +#define GEN7_SAMPLER_MIN_LOD_SHIFT 20 +#define GEN7_SAMPLER_MAX_LOD_MASK INTEL_MASK(19, 8) +#define GEN7_SAMPLER_MAX_LOD_SHIFT 8 +#define GEN7_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(3, 1) +#define GEN7_SAMPLER_SHADOW_FUNCTION_SHIFT 1 +#define GEN7_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 0) + +/* SAMPLER_STATE DW2 - border color pointer */ + +/* SAMPLER_STATE DW3 */ +#define BRW_SAMPLER_MAX_ANISOTROPY_MASK INTEL_MASK(21, 19) +#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT 19 +#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK INTEL_MASK(18, 13) +#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT 13 +#define GEN7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10) +/* Gen7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */ +#define GEN6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0) + +enum brw_wrap_mode { + BRW_TEXCOORDMODE_WRAP = 0, + BRW_TEXCOORDMODE_MIRROR = 1, + BRW_TEXCOORDMODE_CLAMP = 2, + BRW_TEXCOORDMODE_CUBE = 3, + BRW_TEXCOORDMODE_CLAMP_BORDER = 4, + BRW_TEXCOORDMODE_MIRROR_ONCE = 5, + GEN8_TEXCOORDMODE_HALF_BORDER = 6, +}; #define BRW_THREAD_PRIORITY_NORMAL 0 #define BRW_THREAD_PRIORITY_HIGH 1 @@ -624,18 +737,20 @@ enum brw_compression { #define GEN6_COMPRESSION_1H 0 #define GEN6_COMPRESSION_2H 2 -#define BRW_CONDITIONAL_NONE 0 -#define BRW_CONDITIONAL_Z 1 -#define BRW_CONDITIONAL_NZ 2 -#define BRW_CONDITIONAL_EQ 1 /* Z */ -#define BRW_CONDITIONAL_NEQ 2 /* NZ */ -#define BRW_CONDITIONAL_G 3 -#define BRW_CONDITIONAL_GE 4 -#define BRW_CONDITIONAL_L 5 -#define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_R 7 -#define BRW_CONDITIONAL_O 8 -#define BRW_CONDITIONAL_U 9 +enum PACKED brw_conditional_mod { + BRW_CONDITIONAL_NONE = 0, + BRW_CONDITIONAL_Z = 1, + BRW_CONDITIONAL_NZ = 2, + BRW_CONDITIONAL_EQ = 1, /* Z */ + BRW_CONDITIONAL_NEQ = 2, /* NZ */ + BRW_CONDITIONAL_G = 3, + BRW_CONDITIONAL_GE = 4, + BRW_CONDITIONAL_L = 5, + BRW_CONDITIONAL_LE = 6, + BRW_CONDITIONAL_R = 7, /* Gen <= 5 */ + BRW_CONDITIONAL_O = 8, + BRW_CONDITIONAL_U = 9, +}; #define BRW_DEBUG_NONE 0 #define BRW_DEBUG_BREAKPOINT 1 @@ -645,17 +760,21 @@ enum brw_compression { #define BRW_DEPENDENCY_NOTCHECKED 2 #define BRW_DEPENDENCY_DISABLE 3 -#define BRW_EXECUTE_1 0 -#define BRW_EXECUTE_2 1 -#define BRW_EXECUTE_4 2 -#define BRW_EXECUTE_8 3 -#define BRW_EXECUTE_16 4 -#define BRW_EXECUTE_32 5 +enum PACKED brw_execution_size { + BRW_EXECUTE_1 = 0, + BRW_EXECUTE_2 = 1, + BRW_EXECUTE_4 = 2, + BRW_EXECUTE_8 = 3, + BRW_EXECUTE_16 = 4, + BRW_EXECUTE_32 = 5, +}; -#define BRW_HORIZONTAL_STRIDE_0 0 -#define BRW_HORIZONTAL_STRIDE_1 1 -#define BRW_HORIZONTAL_STRIDE_2 2 -#define BRW_HORIZONTAL_STRIDE_4 3 +enum PACKED brw_horizontal_stride { + BRW_HORIZONTAL_STRIDE_0 = 0, + BRW_HORIZONTAL_STRIDE_1 = 1, + BRW_HORIZONTAL_STRIDE_2 = 2, + BRW_HORIZONTAL_STRIDE_4 = 3, +}; #define BRW_INSTRUCTION_NORMAL 0 #define BRW_INSTRUCTION_SATURATE 1 @@ -697,15 +816,16 @@ enum opcode { BRW_OPCODE_ASR = 12, BRW_OPCODE_CMP = 16, BRW_OPCODE_CMPN = 17, - BRW_OPCODE_F32TO16 = 19, - BRW_OPCODE_F16TO32 = 20, - BRW_OPCODE_BFREV = 23, - BRW_OPCODE_BFE = 24, - BRW_OPCODE_BFI1 = 25, - BRW_OPCODE_BFI2 = 26, + BRW_OPCODE_CSEL = 18, /**< Gen8+ */ + BRW_OPCODE_F32TO16 = 19, /**< Gen7 only */ + BRW_OPCODE_F16TO32 = 20, /**< Gen7 only */ + BRW_OPCODE_BFREV = 23, /**< Gen7+ */ + BRW_OPCODE_BFE = 24, /**< Gen7+ */ + BRW_OPCODE_BFI1 = 25, /**< Gen7+ */ + BRW_OPCODE_BFI2 = 26, /**< Gen7+ */ BRW_OPCODE_JMPI = 32, BRW_OPCODE_IF = 34, - BRW_OPCODE_IFF = 35, + BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */ BRW_OPCODE_ELSE = 36, BRW_OPCODE_ENDIF = 37, BRW_OPCODE_DO = 38, @@ -713,14 +833,15 @@ enum opcode { BRW_OPCODE_BREAK = 40, BRW_OPCODE_CONTINUE = 41, BRW_OPCODE_HALT = 42, - BRW_OPCODE_MSAVE = 44, - BRW_OPCODE_MRESTORE = 45, - BRW_OPCODE_PUSH = 46, - BRW_OPCODE_POP = 47, + BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */ + BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */ + BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */ + BRW_OPCODE_GOTO = 46, /**< Gen8+ */ + BRW_OPCODE_POP = 47, /**< Pre-Gen6 */ BRW_OPCODE_WAIT = 48, BRW_OPCODE_SEND = 49, BRW_OPCODE_SENDC = 50, - BRW_OPCODE_MATH = 56, + BRW_OPCODE_MATH = 56, /**< Gen6+ */ BRW_OPCODE_ADD = 64, BRW_OPCODE_MUL = 65, BRW_OPCODE_AVG = 66, @@ -732,11 +853,11 @@ enum opcode { BRW_OPCODE_MAC = 72, BRW_OPCODE_MACH = 73, BRW_OPCODE_LZD = 74, - BRW_OPCODE_FBH = 75, - BRW_OPCODE_FBL = 76, - BRW_OPCODE_CBIT = 77, - BRW_OPCODE_ADDC = 78, - BRW_OPCODE_SUBB = 79, + BRW_OPCODE_FBH = 75, /**< Gen7+ */ + BRW_OPCODE_FBL = 76, /**< Gen7+ */ + BRW_OPCODE_CBIT = 77, /**< Gen7+ */ + BRW_OPCODE_ADDC = 78, /**< Gen7+ */ + BRW_OPCODE_SUBB = 79, /**< Gen7+ */ BRW_OPCODE_SAD2 = 80, BRW_OPCODE_SADA2 = 81, BRW_OPCODE_DP4 = 84, @@ -744,16 +865,33 @@ enum opcode { BRW_OPCODE_DP3 = 86, BRW_OPCODE_DP2 = 87, BRW_OPCODE_LINE = 89, - BRW_OPCODE_PLN = 90, - BRW_OPCODE_MAD = 91, - BRW_OPCODE_LRP = 92, + BRW_OPCODE_PLN = 90, /**< G45+ */ + BRW_OPCODE_MAD = 91, /**< Gen6+ */ + BRW_OPCODE_LRP = 92, /**< Gen6+ */ + BRW_OPCODE_NENOP = 125, /**< G45 only */ BRW_OPCODE_NOP = 126, /* These are compiler backend opcodes that get translated into other * instructions. */ FS_OPCODE_FB_WRITE = 128, + + /** + * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as + * individual sources instead of as a single payload blob: + * + * Source 0: [required] Color 0. + * Source 1: [optional] Color 1 (for dual source blend messages). + * Source 2: [optional] Src0 Alpha. + * Source 3: [optional] Source Depth (passthrough from the thread payload). + * Source 4: [optional] Destination Depth (gl_FragDepth). + * Source 5: [optional] Sample Mask (gl_SampleMask). + * Source 6: [required] Number of color components (as a UD immediate). + */ + FS_OPCODE_FB_WRITE_LOGICAL, + FS_OPCODE_BLORP_FB_WRITE, + FS_OPCODE_REP_FB_WRITE, SHADER_OPCODE_RCP, SHADER_OPCODE_RSQ, SHADER_OPCODE_SQRT, @@ -778,21 +916,68 @@ enum opcode { SHADER_OPCODE_TG4, SHADER_OPCODE_TG4_OFFSET, + /** + * Combines multiple sources of size 1 into a larger virtual GRF. + * For example, parameters for a send-from-GRF message. Or, updating + * channels of a size 4 VGRF used to store vec4s such as texturing results. + * + * This will be lowered into MOVs from each source to consecutive reg_offsets + * of the destination VGRF. + * + * src[0] may be BAD_FILE. If so, the lowering pass skips emitting the MOV, + * but still reserves the first channel of the destination VGRF. This can be + * used to reserve space for, say, a message header set up by the generators. + */ + SHADER_OPCODE_LOAD_PAYLOAD, + SHADER_OPCODE_SHADER_TIME_ADD, SHADER_OPCODE_UNTYPED_ATOMIC, SHADER_OPCODE_UNTYPED_SURFACE_READ, + SHADER_OPCODE_UNTYPED_SURFACE_WRITE, + + SHADER_OPCODE_TYPED_ATOMIC, + SHADER_OPCODE_TYPED_SURFACE_READ, + SHADER_OPCODE_TYPED_SURFACE_WRITE, + + SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, SHADER_OPCODE_GEN4_SCRATCH_WRITE, SHADER_OPCODE_GEN7_SCRATCH_READ, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, + SHADER_OPCODE_URB_WRITE_SIMD8, + + /** + * Return the index of an arbitrary live channel (i.e. one of the channels + * enabled in the current execution mask) and assign it to the first + * component of the destination. Expected to be used as input for the + * BROADCAST pseudo-opcode. + */ + SHADER_OPCODE_FIND_LIVE_CHANNEL, + + /** + * Pick the channel from its first source register given by the index + * specified as second source. Useful for variable indexing of surfaces. + */ + SHADER_OPCODE_BROADCAST, + + VEC4_OPCODE_MOV_BYTES, + VEC4_OPCODE_PACK_BYTES, + VEC4_OPCODE_UNPACK_UNIFORM, + + FS_OPCODE_DDX_COARSE, + FS_OPCODE_DDX_FINE, + /** + * Compute dFdy(), dFdyCoarse(), or dFdyFine(). + * src1 is an immediate storing the key->render_to_fbo boolean. + */ + FS_OPCODE_DDY_COARSE, + FS_OPCODE_DDY_FINE, FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, @@ -806,10 +991,15 @@ enum opcode { FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, FS_OPCODE_PLACEHOLDER_HALT, + FS_OPCODE_INTERPOLATE_AT_CENTROID, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, + FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, VS_OPCODE_URB_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, VS_OPCODE_UNPACK_FLAGS_SIMD4X2, /** @@ -821,6 +1011,14 @@ enum opcode { */ GS_OPCODE_URB_WRITE, + /** + * Write geometry shader output data to the URB and request a new URB + * handle (gen6). + * + * This opcode doesn't do an implied move from R0 to the first MRF. + */ + GS_OPCODE_URB_WRITE_ALLOCATE, + /** * Terminate the geometry shader thread by doing an empty URB write. * @@ -857,11 +1055,9 @@ enum opcode { GS_OPCODE_SET_VERTEX_COUNT, /** - * Set DWORD 2 of dst to the immediate value in src. Used by geometry - * shaders to initialize DWORD 2 of R0, which needs to be 0 in order for - * scratch reads and writes to operate correctly. + * Set DWORD 2 of dst to the value in src. */ - GS_OPCODE_SET_DWORD_2_IMMED, + GS_OPCODE_SET_DWORD_2, /** * Prepare the dst register for storage in the "Channel Mask" fields of a @@ -888,6 +1084,86 @@ enum opcode { * form the final channel mask. */ GS_OPCODE_SET_CHANNEL_MASKS, + + /** + * Get the "Instance ID" fields from the payload. + * + * - dst is the GRF for gl_InvocationID. + */ + GS_OPCODE_GET_INSTANCE_ID, + + /** + * Send a FF_SYNC message to allocate initial URB handles (gen6). + * + * - dst will be used as the writeback register for the FF_SYNC operation. + * + * - src0 is the number of primitives written. + * + * - src1 is the value to hold in M0.0: number of SO vertices to write + * and number of SO primitives needed. Its value will be overwritten + * with the SVBI values if transform feedback is enabled. + * + * Note: This opcode uses an implicit MRF register for the ff_sync message + * header, so the caller is expected to set inst->base_mrf and initialize + * that MRF register to r0. This opcode will also write to this MRF register + * to include the allocated URB handle so it can then be reused directly as + * the header in the URB write operation we are allocating the handle for. + */ + GS_OPCODE_FF_SYNC, + + /** + * Move r0.1 (which holds PrimitiveID information in gen6) to a separate + * register. + * + * - dst is the GRF where PrimitiveID information will be moved. + */ + GS_OPCODE_SET_PRIMITIVE_ID, + + /** + * Write transform feedback data to the SVB by sending a SVB WRITE message. + * Used in gen6. + * + * - dst is the MRF register containing the message header. + * + * - src0 is the register where the vertex data is going to be copied from. + * + * - src1 is the destination register when write commit occurs. + */ + GS_OPCODE_SVB_WRITE, + + /** + * Set destination index in the SVB write message payload (M0.5). Used + * in gen6 for transform feedback. + * + * - dst is the header to save the destination indices for SVB WRITE. + * - src is the register that holds the destination indices value. + */ + GS_OPCODE_SVB_SET_DST_INDEX, + + /** + * Prepare Mx.0 subregister for being used in the FF_SYNC message header. + * Used in gen6 for transform feedback. + * + * - dst will hold the register with the final Mx.0 value. + * + * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite) + * + * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded) + * + * - src2 is the value to hold in M0: number of SO vertices to write + * and number of SO primitives needed. + */ + GS_OPCODE_FF_SYNC_SET_PRIMITIVES, + + /** + * Terminate the compute shader. + */ + CS_OPCODE_CS_TERMINATE, + + /** + * GLSL barrier() + */ + SHADER_OPCODE_BARRIER, }; enum brw_urb_write_flags { @@ -961,24 +1237,28 @@ operator|(brw_urb_write_flags x, brw_urb_write_flags y) } #endif -#define BRW_PREDICATE_NONE 0 -#define BRW_PREDICATE_NORMAL 1 -#define BRW_PREDICATE_ALIGN1_ANYV 2 -#define BRW_PREDICATE_ALIGN1_ALLV 3 -#define BRW_PREDICATE_ALIGN1_ANY2H 4 -#define BRW_PREDICATE_ALIGN1_ALL2H 5 -#define BRW_PREDICATE_ALIGN1_ANY4H 6 -#define BRW_PREDICATE_ALIGN1_ALL4H 7 -#define BRW_PREDICATE_ALIGN1_ANY8H 8 -#define BRW_PREDICATE_ALIGN1_ALL8H 9 -#define BRW_PREDICATE_ALIGN1_ANY16H 10 -#define BRW_PREDICATE_ALIGN1_ALL16H 11 -#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 -#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 -#define BRW_PREDICATE_ALIGN16_ANY4H 6 -#define BRW_PREDICATE_ALIGN16_ALL4H 7 +enum PACKED brw_predicate { + BRW_PREDICATE_NONE = 0, + BRW_PREDICATE_NORMAL = 1, + BRW_PREDICATE_ALIGN1_ANYV = 2, + BRW_PREDICATE_ALIGN1_ALLV = 3, + BRW_PREDICATE_ALIGN1_ANY2H = 4, + BRW_PREDICATE_ALIGN1_ALL2H = 5, + BRW_PREDICATE_ALIGN1_ANY4H = 6, + BRW_PREDICATE_ALIGN1_ALL4H = 7, + BRW_PREDICATE_ALIGN1_ANY8H = 8, + BRW_PREDICATE_ALIGN1_ALL8H = 9, + BRW_PREDICATE_ALIGN1_ANY16H = 10, + BRW_PREDICATE_ALIGN1_ALL16H = 11, + BRW_PREDICATE_ALIGN1_ANY32H = 12, + BRW_PREDICATE_ALIGN1_ALL32H = 13, + BRW_PREDICATE_ALIGN16_REPLICATE_X = 2, + BRW_PREDICATE_ALIGN16_REPLICATE_Y = 3, + BRW_PREDICATE_ALIGN16_REPLICATE_Z = 4, + BRW_PREDICATE_ALIGN16_REPLICATE_W = 5, + BRW_PREDICATE_ALIGN16_ANY4H = 6, + BRW_PREDICATE_ALIGN16_ALL4H = 7, +}; #define BRW_ARCHITECTURE_REGISTER_FILE 0 #define BRW_GENERAL_REGISTER_FILE 1 @@ -1042,23 +1322,24 @@ operator|(brw_urb_write_flags x, brw_urb_write_flags y) #define BRW_THREAD_ATOMIC 1 #define BRW_THREAD_SWITCH 2 -#define BRW_VERTICAL_STRIDE_0 0 -#define BRW_VERTICAL_STRIDE_1 1 -#define BRW_VERTICAL_STRIDE_2 2 -#define BRW_VERTICAL_STRIDE_4 3 -#define BRW_VERTICAL_STRIDE_8 4 -#define BRW_VERTICAL_STRIDE_16 5 -#define BRW_VERTICAL_STRIDE_32 6 -#define BRW_VERTICAL_STRIDE_64 7 -#define BRW_VERTICAL_STRIDE_128 8 -#define BRW_VERTICAL_STRIDE_256 9 -#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF - -#define BRW_WIDTH_1 0 -#define BRW_WIDTH_2 1 -#define BRW_WIDTH_4 2 -#define BRW_WIDTH_8 3 -#define BRW_WIDTH_16 4 +enum PACKED brw_vertical_stride { + BRW_VERTICAL_STRIDE_0 = 0, + BRW_VERTICAL_STRIDE_1 = 1, + BRW_VERTICAL_STRIDE_2 = 2, + BRW_VERTICAL_STRIDE_4 = 3, + BRW_VERTICAL_STRIDE_8 = 4, + BRW_VERTICAL_STRIDE_16 = 5, + BRW_VERTICAL_STRIDE_32 = 6, + BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL = 0xF, +}; + +enum PACKED brw_width { + BRW_WIDTH_1 = 0, + BRW_WIDTH_2 = 1, + BRW_WIDTH_4 = 2, + BRW_WIDTH_8 = 3, + BRW_WIDTH_16 = 4, +}; #define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 #define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 @@ -1156,6 +1437,11 @@ enum brw_message_target { #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 #define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 +/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2 + * behavior by setting bit 22 of dword 2 in the message header. */ +#define GEN9_SAMPLER_SIMD_MODE_SIMD8D 0 +#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2 (1 << 22) + #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 @@ -1216,7 +1502,13 @@ enum brw_message_target { #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 /* GEN7 */ -#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 +#define GEN7_DATAPORT_RC_MEDIA_BLOCK_READ 4 +#define GEN7_DATAPORT_RC_TYPED_SURFACE_READ 5 +#define GEN7_DATAPORT_RC_TYPED_ATOMIC_OP 6 +#define GEN7_DATAPORT_RC_MEMORY_FENCE 7 +#define GEN7_DATAPORT_RC_MEDIA_BLOCK_WRITE 10 +#define GEN7_DATAPORT_RC_RENDER_TARGET_WRITE 12 +#define GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE 13 #define GEN7_DATAPORT_DC_OWORD_BLOCK_READ 0 #define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1 #define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ 2 @@ -1237,6 +1529,11 @@ enum brw_message_target { (1 << 17)) #define GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT 12 +#define GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET 0 +#define GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE 1 +#define GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID 2 +#define GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET 3 + /* HSW */ #define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0 #define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1 @@ -1309,6 +1606,7 @@ enum brw_message_target { #define BRW_URB_OPCODE_WRITE_HWORD 0 #define BRW_URB_OPCODE_WRITE_OWORD 1 +#define GEN8_URB_OPCODE_SIMD8_WRITE 7 #define BRW_URB_SWIZZLE_NONE 0 #define BRW_URB_SWIZZLE_INTERLEAVE 1 @@ -1327,6 +1625,14 @@ enum brw_message_target { #define BRW_SCRATCH_SPACE_SIZE_1M 10 #define BRW_SCRATCH_SPACE_SIZE_2M 11 +#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0 +#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1 +#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2 +#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3 +#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4 +#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5 +#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6 + #define CMD_URB_FENCE 0x6000 #define CMD_CS_URB_STATE 0x6001 @@ -1349,6 +1655,36 @@ enum brw_message_target { #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */ +#define BRW_HW_BINDING_TABLE_ENABLE (1 << 11) +#define GEN7_HW_BT_POOL_MOCS_SHIFT 7 +#define GEN7_HW_BT_POOL_MOCS_MASK INTEL_MASK(10, 7) +#define GEN8_HW_BT_POOL_MOCS_SHIFT 0 +#define GEN8_HW_BT_POOL_MOCS_MASK INTEL_MASK(6, 0) +/* Only required in HSW */ +#define HSW_BT_POOL_ALLOC_MUST_BE_ONE (3 << 5) + +#define _3DSTATE_BINDING_TABLE_EDIT_VS 0x7843 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_GS 0x7844 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_HS 0x7845 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_DS 0x7846 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_PS 0x7847 /* GEN7.5 */ +#define BRW_BINDING_TABLE_INDEX_SHIFT 16 +#define BRW_BINDING_TABLE_INDEX_MASK INTEL_MASK(23, 16) + +#define BRW_BINDING_TABLE_EDIT_TARGET_ALL 3 +#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1 2 +#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0 1 +/* In HSW, when editing binding table entries to surface state offsets, + * the surface state offset is a 16-bit value aligned to 32 bytes. But + * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset + * by 5 bits so it won't disturb bit 16 (which is used as the binding + * table index entry), otherwise it would hang the GPU. + */ +#define HSW_SURFACE_STATE_EDIT(value) (value >> 5) +/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/ +#define GEN8_SURFACE_STATE_EDIT(value) (value >> 6) + #define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) # define GS_SAMPLER_STATE_CHANGE (1 << 9) @@ -1409,6 +1745,21 @@ enum brw_message_target { #define _3DSTATE_VF 0x780c /* GEN7.5+ */ #define HSW_CUT_INDEX_ENABLE (1 << 8) +#define _3DSTATE_VF_INSTANCING 0x7849 /* GEN8+ */ +# define GEN8_VF_INSTANCING_ENABLE (1 << 8) + +#define _3DSTATE_VF_SGVS 0x784a /* GEN8+ */ +# define GEN8_SGVS_ENABLE_INSTANCE_ID (1 << 31) +# define GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT 29 +# define GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16 +# define GEN8_SGVS_ENABLE_VERTEX_ID (1 << 15) +# define GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT 13 +# define GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0 + +#define _3DSTATE_VF_TOPOLOGY 0x784b /* GEN8+ */ + +#define _3DSTATE_WM_CHROMAKEY 0x784c /* GEN8+ */ + #define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ #define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ #define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ @@ -1416,10 +1767,14 @@ enum brw_message_target { # define GEN7_URB_ENTRY_SIZE_SHIFT 16 # define GEN7_URB_STARTING_ADDRESS_SHIFT 25 -/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size +/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size * is 2^9, or 512. It's counted in multiples of 64 bytes. */ -#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit + * (128 bytes) URB rows and the maximum allowed value is 5 rows. + */ +#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128) #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */ @@ -1430,7 +1785,7 @@ enum brw_message_target { # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) # define GEN6_SF_VIEWPORT_MODIFY (1 << 11) # define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) -# define GEN7_NUM_VIEWPORTS 16 +# define GEN6_NUM_VIEWPORTS 16 #define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */ #define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */ @@ -1455,6 +1810,8 @@ enum brw_message_target { # define GEN6_VS_STATISTICS_ENABLE (1 << 10) # define GEN6_VS_CACHE_DISABLE (1 << 1) # define GEN6_VS_ENABLE (1 << 0) +/* Gen8+ DW7 */ +# define GEN8_VS_SIMD8_ENABLE (1 << 2) /* Gen8+ DW8 */ # define GEN8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 # define GEN8_VS_URB_OUTPUT_LENGTH_SHIFT 16 @@ -1482,9 +1839,9 @@ enum brw_message_target { # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 -# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11) +# define GEN7_GS_INSTANCE_CONTROL_SHIFT 15 +# define GEN7_GS_DISPATCH_MODE_SHIFT 11 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) # define GEN6_GS_STATISTICS_ENABLE (1 << 10) # define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) # define GEN6_GS_RENDERING_ENABLE (1 << 8) @@ -1501,9 +1858,19 @@ enum brw_message_target { # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) # define GEN6_GS_ENABLE (1 << 15) +/* Gen8+ DW9 */ +# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 +# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16 +# define GEN8_GS_USER_CLIP_DISTANCE_SHIFT 8 + # define BRW_GS_EDGE_INDICATOR_0 (1 << 8) # define BRW_GS_EDGE_INDICATOR_1 (1 << 9) +/* GS Thread Payload + */ +/* R0 */ +# define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 + /* 3DSTATE_GS "Output Vertex Size" has an effective maximum of 62. It's * counted in multiples of 16 bytes. */ @@ -1559,6 +1926,7 @@ enum brw_message_target { # define GEN6_SF_SWIZZLE_ENABLE (1 << 21) # define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN9_SF_LINE_WIDTH_SHIFT 12 /* U11.7 */ # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 /* DW2 */ @@ -1642,7 +2010,7 @@ enum brw_message_target { /* GEN7/DW1: */ # define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 /* GEN7/DW2: */ -# define HSW_SF_LINE_STIPPLE_ENABLE 14 +# define HSW_SF_LINE_STIPPLE_ENABLE (1 << 14) # define GEN8_SF_SMOOTH_POINT_ENABLE (1 << 13) @@ -1663,19 +2031,96 @@ enum brw_message_target { /* DW12: attr 0-7 wrap shortest enables */ /* DW13: attr 8-16 wrap shortest enables */ +/* DW4-5: Attribute active components (gen9) */ +#define GEN9_SBE_ACTIVE_COMPONENT_NONE 0 +#define GEN9_SBE_ACTIVE_COMPONENT_XY 1 +#define GEN9_SBE_ACTIVE_COMPONENT_XYZ 2 +#define GEN9_SBE_ACTIVE_COMPONENT_XYZW 3 + #define _3DSTATE_SBE_SWIZ 0x7851 /* GEN8+ */ #define _3DSTATE_RASTER 0x7850 /* GEN8+ */ /* DW1 */ +# define GEN9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE (1 << 26) # define GEN8_RASTER_FRONT_WINDING_CCW (1 << 21) # define GEN8_RASTER_CULL_BOTH (0 << 16) # define GEN8_RASTER_CULL_NONE (1 << 16) # define GEN8_RASTER_CULL_FRONT (2 << 16) # define GEN8_RASTER_CULL_BACK (3 << 16) # define GEN8_RASTER_SMOOTH_POINT_ENABLE (1 << 13) +# define GEN8_RASTER_API_MULTISAMPLE_ENABLE (1 << 12) # define GEN8_RASTER_LINE_AA_ENABLE (1 << 2) # define GEN8_RASTER_SCISSOR_ENABLE (1 << 1) # define GEN8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0) +# define GEN9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE (1 << 0) + +/* Gen8 BLEND_STATE */ +/* DW0 */ +#define GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) +#define GEN8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 30) +#define GEN8_BLEND_ALPHA_TO_ONE_ENABLE (1 << 29) +#define GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE (1 << 28) +#define GEN8_BLEND_ALPHA_TEST_ENABLE (1 << 27) +#define GEN8_BLEND_ALPHA_TEST_FUNCTION_MASK INTEL_MASK(26, 24) +#define GEN8_BLEND_ALPHA_TEST_FUNCTION_SHIFT 24 +#define GEN8_BLEND_COLOR_DITHER_ENABLE (1 << 23) +#define GEN8_BLEND_X_DITHER_OFFSET_MASK INTEL_MASK(22, 21) +#define GEN8_BLEND_X_DITHER_OFFSET_SHIFT 21 +#define GEN8_BLEND_Y_DITHER_OFFSET_MASK INTEL_MASK(20, 19) +#define GEN8_BLEND_Y_DITHER_OFFSET_SHIFT 19 +/* DW1 + 2n */ +#define GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 31) +#define GEN8_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(30, 26) +#define GEN8_BLEND_SRC_BLEND_FACTOR_SHIFT 26 +#define GEN8_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(25, 21) +#define GEN8_BLEND_DST_BLEND_FACTOR_SHIFT 21 +#define GEN8_BLEND_COLOR_BLEND_FUNCTION_MASK INTEL_MASK(20, 18) +#define GEN8_BLEND_COLOR_BLEND_FUNCTION_SHIFT 18 +#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(17, 13) +#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 13 +#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(12, 8) +#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 8 +#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_MASK INTEL_MASK(7, 5) +#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT 5 +#define GEN8_BLEND_WRITE_DISABLE_ALPHA (1 << 3) +#define GEN8_BLEND_WRITE_DISABLE_RED (1 << 2) +#define GEN8_BLEND_WRITE_DISABLE_GREEN (1 << 1) +#define GEN8_BLEND_WRITE_DISABLE_BLUE (1 << 0) +/* DW1 + 2n + 1 */ +#define GEN8_BLEND_LOGIC_OP_ENABLE (1 << 31) +#define GEN8_BLEND_LOGIC_OP_FUNCTION_MASK INTEL_MASK(30, 27) +#define GEN8_BLEND_LOGIC_OP_FUNCTION_SHIFT 27 +#define GEN8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE (1 << 4) +#define GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT (2 << 2) +#define GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE (1 << 1) +#define GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE (1 << 0) + +#define _3DSTATE_WM_HZ_OP 0x7852 /* GEN8+ */ +/* DW1 */ +# define GEN8_WM_HZ_STENCIL_CLEAR (1 << 31) +# define GEN8_WM_HZ_DEPTH_CLEAR (1 << 30) +# define GEN8_WM_HZ_DEPTH_RESOLVE (1 << 28) +# define GEN8_WM_HZ_HIZ_RESOLVE (1 << 27) +# define GEN8_WM_HZ_PIXEL_OFFSET_ENABLE (1 << 26) +# define GEN8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR (1 << 25) +# define GEN8_WM_HZ_STENCIL_CLEAR_VALUE_MASK INTEL_MASK(23, 16) +# define GEN8_WM_HZ_STENCIL_CLEAR_VALUE_SHIFT 16 +# define GEN8_WM_HZ_NUM_SAMPLES_MASK INTEL_MASK(15, 13) +# define GEN8_WM_HZ_NUM_SAMPLES_SHIFT 13 +/* DW2 */ +# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_MASK INTEL_MASK(31, 16) +# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_SHIFT 16 +# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MIN_MASK INTEL_MASK(15, 0) +# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MIN_SHIFT 0 +/* DW3 */ +# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_MASK INTEL_MASK(31, 16) +# define GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_SHIFT 16 +# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX_MASK INTEL_MASK(15, 0) +# define GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX_SHIFT 0 +/* DW4 */ +# define GEN8_WM_HZ_SAMPLE_MASK_MASK INTEL_MASK(15, 0) +# define GEN8_WM_HZ_SAMPLE_MASK_SHIFT 0 + #define _3DSTATE_PS_BLEND 0x784D /* GEN8+ */ /* DW1 */ @@ -1718,6 +2163,36 @@ enum brw_message_target { # define GEN8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 # define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) # define GEN8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0 +/* DW3 */ +# define GEN9_WM_DS_STENCIL_REF_MASK INTEL_MASK(15, 8) +# define GEN9_WM_DS_STENCIL_REF_SHIFT 8 +# define GEN9_WM_DS_BF_STENCIL_REF_MASK INTEL_MASK(7, 0) +# define GEN9_WM_DS_BF_STENCIL_REF_SHIFT 0 + +enum brw_pixel_shader_computed_depth_mode { + BRW_PSCDEPTH_OFF = 0, /* PS does not compute depth */ + BRW_PSCDEPTH_ON = 1, /* PS computes depth; no guarantee about value */ + BRW_PSCDEPTH_ON_GE = 2, /* PS guarantees output depth >= source depth */ + BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */ +}; + +#define _3DSTATE_PS_EXTRA 0x784F /* GEN8+ */ +/* DW1 */ +# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31) +# define GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30) +# define GEN8_PSX_OMASK_TO_RENDER_TARGET (1 << 29) +# define GEN8_PSX_KILL_ENABLE (1 << 28) +# define GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT 26 +# define GEN8_PSX_FORCE_COMPUTED_DEPTH (1 << 25) +# define GEN8_PSX_USES_SOURCE_DEPTH (1 << 24) +# define GEN8_PSX_USES_SOURCE_W (1 << 23) +# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8) +# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) +# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6) +# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5) +# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3) +# define GEN8_PSX_SHADER_HAS_UAV (1 << 2) +# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) enum brw_wm_barycentric_interp_mode { BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0, @@ -1850,10 +2325,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_WM_DEPTH_RESOLVE (1 << 28) # define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) # define GEN7_WM_KILL_ENABLE (1 << 25) -# define GEN7_WM_PSCDEPTH_OFF (0 << 23) -# define GEN7_WM_PSCDEPTH_ON (1 << 23) -# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) -# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) +# define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT 23 # define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) # define GEN7_WM_USES_SOURCE_W (1 << 19) # define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) @@ -1886,6 +2358,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_SPF_MODE (1 << 31) # define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) # define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27) # define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) @@ -1934,12 +2407,17 @@ enum brw_wm_barycentric_interp_mode { /* DW3: SVB maximum index */ #define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ +#define GEN8_3DSTATE_MULTISAMPLE 0x780d /* GEN8+ */ /* DW1 */ # define MS_PIXEL_LOCATION_CENTER (0 << 4) # define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) # define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_2 (1 << 1) # define MS_NUMSAMPLES_4 (2 << 1) # define MS_NUMSAMPLES_8 (3 << 1) +# define MS_NUMSAMPLES_16 (4 << 1) + +#define _3DSTATE_SAMPLE_PATTERN 0x791c #define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ #define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ @@ -1987,8 +2465,11 @@ enum brw_wm_barycentric_interp_mode { #define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */ /* DW1 */ +# define GEN8_SO_BUFFER_ENABLE (1 << 31) # define SO_BUFFER_INDEX_SHIFT 29 # define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) +# define GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE (1 << 21) +# define GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE (1 << 20) # define SO_BUFFER_PITCH_SHIFT 0 # define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) /* DW2: start address */ @@ -2048,6 +2529,59 @@ enum brw_wm_barycentric_interp_mode { #define HSW_MOCS_WB_LLC_WB_ELLC (2 << 1) #define HSW_MOCS_UC_LLC_WB_ELLC (3 << 1) -#include "intel_chipset.h" +/* Broadwell: these defines always use all available caches (L3, LLC, eLLC), + * and let you force write-back (WB) or write-through (WT) caching, or leave + * it up to the page table entry (PTE) specified by the kernel. + */ +#define BDW_MOCS_WB 0x78 +#define BDW_MOCS_WT 0x58 +#define BDW_MOCS_PTE 0x18 + +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ +/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ +#define SKL_MOCS_WB (2 << 1) +/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */ +#define SKL_MOCS_PTE (1 << 1) + +#define MEDIA_VFE_STATE 0x7000 +/* GEN7 DW2, GEN8+ DW3 */ +# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT 16 +# define MEDIA_VFE_STATE_MAX_THREADS_MASK INTEL_MASK(31, 16) +# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT 8 +# define MEDIA_VFE_STATE_URB_ENTRIES_MASK INTEL_MASK(15, 8) +# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT 7 +# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK INTEL_MASK(7, 7) +# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT 6 +# define MEDIA_VFE_STATE_BYPASS_GTW_MASK INTEL_MASK(6, 6) +# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT 2 +# define GEN7_MEDIA_VFE_STATE_GPGPU_MODE_MASK INTEL_MASK(2, 2) +/* GEN7 DW4, GEN8+ DW5 */ +# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT 16 +# define MEDIA_VFE_STATE_URB_ALLOC_MASK INTEL_MASK(31, 16) +# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0 +# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) + +#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +/* GEN7 DW5, GEN8+ DW6 */ +# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) +#define MEDIA_STATE_FLUSH 0x7004 +#define GPGPU_WALKER 0x7105 +/* GEN8+ DW2 */ +# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 +# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) +/* GEN7 DW2, GEN8+ DW4 */ +# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30 +# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30) +# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16 +# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16) +# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8 +# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8) +# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0 +# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0) #endif