X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=7a6ec2fda944a65e40048b261c8a8ddc366b04a3;hb=c5b3878714a75dab40439622050b2ce6f60337c0;hp=ae66249d94c4d63263c6dd8ad230052ee7407c6f;hpb=ec542d74578bbef6b55125dd6aba1dc7f5079e65;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index ae66249d94c..7a6ec2fda94 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -40,6 +40,8 @@ #include "main/mm.h" #include "main/mtypes.h" #include "brw_structs.h" +#include "intel_aub.h" +#include "program/prog_parameter.h" #ifdef __cplusplus extern "C" { @@ -140,17 +142,28 @@ extern "C" { * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_MAX_CURBE (32*16) - struct brw_context; -struct brw_instruction; +struct brw_inst; struct brw_vs_prog_key; struct brw_vec4_prog_key; struct brw_wm_prog_key; struct brw_wm_prog_data; +enum brw_cache_id { + BRW_CACHE_FS_PROG, + BRW_CACHE_BLORP_BLIT_PROG, + BRW_CACHE_SF_PROG, + BRW_CACHE_VS_PROG, + BRW_CACHE_FF_GS_PROG, + BRW_CACHE_GS_PROG, + BRW_CACHE_CLIP_PROG, + + BRW_MAX_CACHE +}; + enum brw_state_id { - BRW_STATE_URB_FENCE, + /* brw_cache_ids must come first - see brw_state_cache.c */ + BRW_STATE_URB_FENCE = BRW_MAX_CACHE, BRW_STATE_FRAGMENT_PROGRAM, BRW_STATE_GEOMETRY_PROGRAM, BRW_STATE_VERTEX_PROGRAM, @@ -181,45 +194,93 @@ enum brw_state_id { BRW_STATE_META_IN_PROGRESS, BRW_STATE_INTERPOLATION_MAP, BRW_STATE_PUSH_CONSTANT_ALLOCATION, + BRW_STATE_NUM_SAMPLES, + BRW_STATE_TEXTURE_BUFFER, + BRW_STATE_GEN4_UNIT_STATE, + BRW_STATE_CC_VP, + BRW_STATE_SF_VP, + BRW_STATE_CLIP_VP, + BRW_STATE_SAMPLER_STATE_TABLE, + BRW_STATE_VS_ATTRIB_WORKAROUNDS, BRW_NUM_STATE_BITS }; -#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) -#define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM) -#define BRW_NEW_GEOMETRY_PROGRAM (1 << BRW_STATE_GEOMETRY_PROGRAM) -#define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM) -#define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS) -#define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE) -#define BRW_NEW_PRIMITIVE (1 << BRW_STATE_PRIMITIVE) -#define BRW_NEW_CONTEXT (1 << BRW_STATE_CONTEXT) -#define BRW_NEW_PSP (1 << BRW_STATE_PSP) -#define BRW_NEW_SURFACES (1 << BRW_STATE_SURFACES) -#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE) -#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE) -#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE) -#define BRW_NEW_INDICES (1 << BRW_STATE_INDICES) -#define BRW_NEW_VERTICES (1 << BRW_STATE_VERTICES) +/** + * BRW_NEW_*_PROG_DATA and BRW_NEW_*_PROGRAM are similar, but distinct. + * + * BRW_NEW_*_PROGRAM relates to the gl_shader_program/gl_program structures. + * When the currently bound shader program differs from the previous draw + * call, these will be flagged. They cover brw->{stage}_program and + * ctx->{Stage}Program->_Current. + * + * BRW_NEW_*_PROG_DATA is flagged when the effective shaders change, from a + * driver perspective. Even if the same shader is bound at the API level, + * we may need to switch between multiple versions of that shader to handle + * changes in non-orthagonal state. + * + * Additionally, multiple shader programs may have identical vertex shaders + * (for example), or compile down to the same code in the backend. We combine + * those into a single program cache entry. + * + * BRW_NEW_*_PROG_DATA occurs when switching program cache entries, which + * covers the brw_*_prog_data structures, and brw->*.prog_offset. + */ +#define BRW_NEW_FS_PROG_DATA (1ull << BRW_CACHE_FS_PROG) +/* XXX: The BRW_NEW_BLORP_BLIT_PROG_DATA dirty bit is unused (as BLORP doesn't + * use the normal state upload paths), but the cache is still used. To avoid + * polluting the brw_state_cache code with special cases, we retain the dirty + * bit for now. It should eventually be removed. + */ +#define BRW_NEW_BLORP_BLIT_PROG_DATA (1ull << BRW_CACHE_BLORP_BLIT_PROG) +#define BRW_NEW_SF_PROG_DATA (1ull << BRW_CACHE_SF_PROG) +#define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG) +#define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG) +#define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG) +#define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG) +#define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE) +#define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM) +#define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM) +#define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM) +#define BRW_NEW_CURBE_OFFSETS (1ull << BRW_STATE_CURBE_OFFSETS) +#define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE) +#define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE) +#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT) +#define BRW_NEW_PSP (1ull << BRW_STATE_PSP) +#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES) +#define BRW_NEW_VS_BINDING_TABLE (1ull << BRW_STATE_VS_BINDING_TABLE) +#define BRW_NEW_GS_BINDING_TABLE (1ull << BRW_STATE_GS_BINDING_TABLE) +#define BRW_NEW_PS_BINDING_TABLE (1ull << BRW_STATE_PS_BINDING_TABLE) +#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES) +#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES) /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. */ -#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH) +#define BRW_NEW_BATCH (1ull << BRW_STATE_BATCH) /** \see brw.state.depth_region */ -#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) -#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) -#define BRW_NEW_GS_CONSTBUF (1 << BRW_STATE_GS_CONSTBUF) -#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) -#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) -#define BRW_NEW_VUE_MAP_VS (1 << BRW_STATE_VUE_MAP_VS) -#define BRW_NEW_VUE_MAP_GEOM_OUT (1 << BRW_STATE_VUE_MAP_GEOM_OUT) -#define BRW_NEW_TRANSFORM_FEEDBACK (1 << BRW_STATE_TRANSFORM_FEEDBACK) -#define BRW_NEW_RASTERIZER_DISCARD (1 << BRW_STATE_RASTERIZER_DISCARD) -#define BRW_NEW_STATS_WM (1 << BRW_STATE_STATS_WM) -#define BRW_NEW_UNIFORM_BUFFER (1 << BRW_STATE_UNIFORM_BUFFER) -#define BRW_NEW_ATOMIC_BUFFER (1 << BRW_STATE_ATOMIC_BUFFER) -#define BRW_NEW_META_IN_PROGRESS (1 << BRW_STATE_META_IN_PROGRESS) -#define BRW_NEW_INTERPOLATION_MAP (1 << BRW_STATE_INTERPOLATION_MAP) -#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION) +#define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER) +#define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF) +#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE) +#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS) +#define BRW_NEW_VUE_MAP_VS (1ull << BRW_STATE_VUE_MAP_VS) +#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT) +#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK) +#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD) +#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM) +#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER) +#define BRW_NEW_ATOMIC_BUFFER (1ull << BRW_STATE_ATOMIC_BUFFER) +#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS) +#define BRW_NEW_INTERPOLATION_MAP (1ull << BRW_STATE_INTERPOLATION_MAP) +#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION) +#define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES) +#define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER) +#define BRW_NEW_GEN4_UNIT_STATE (1ull << BRW_STATE_GEN4_UNIT_STATE) +#define BRW_NEW_CC_VP (1ull << BRW_STATE_CC_VP) +#define BRW_NEW_SF_VP (1ull << BRW_STATE_SF_VP) +#define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP) +#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE) +#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -227,76 +288,9 @@ struct brw_state_flags { /** * State update flags signalled as the result of brw_tracked_state updates */ - GLuint brw; - /** State update flags signalled by brw_state_cache.c searches */ - GLuint cache; + uint64_t brw; }; -#define AUB_TRACE_TYPE_MASK 0x0000ff00 -#define AUB_TRACE_TYPE_NOTYPE (0 << 8) -#define AUB_TRACE_TYPE_BATCH (1 << 8) -#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) -#define AUB_TRACE_TYPE_2D_MAP (6 << 8) -#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) -#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) -#define AUB_TRACE_TYPE_1D_MAP (10 << 8) -#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) -#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) -#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) -#define AUB_TRACE_TYPE_GENERAL (14 << 8) -#define AUB_TRACE_TYPE_SURFACE (15 << 8) - -/** - * state_struct_type enum values are encoded with the top 16 bits representing - * the type to be delivered to the .aub file, and the bottom 16 bits - * representing the subtype. This macro performs the encoding. - */ -#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) - -enum state_struct_type { - AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), - AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), - AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), - AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), - AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), - AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), - AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), - AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), - AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), - AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), - AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), - AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), - AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), - - AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), - AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), - AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), - - AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), - AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), - AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), - AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), - AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), -}; - -/** - * Decode a state_struct_type value to determine the type that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_TYPE(enum state_struct_type ss_type) -{ - return (ss_type & 0xFFFF0000) >> 16; -} - -/** - * Decode a state_struct_type value to determine the subtype that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_SUBTYPE(enum state_struct_type ss_type) -{ - return ss_type & 0xFFFF; -} - /** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; @@ -354,14 +348,25 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + unsigned curb_read_length; + unsigned total_scratch; + + /** + * Register where the thread expects to find input data from the URB + * (typically uniforms, followed by vertex or fragment attributes). + */ + unsigned dispatch_grf_start_reg; + + bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ + /* Pointers to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). * * These must be the last fields of the struct (see * brw_stage_prog_data_compare()). */ - const float **param; - const float **pull_param; + const gl_constant_value **param; + const gl_constant_value **pull_param; }; /* Data about a particular attempt to compile a program. Note that @@ -375,14 +380,11 @@ struct brw_stage_prog_data { struct brw_wm_prog_data { struct brw_stage_prog_data base; - GLuint curb_read_length; GLuint num_varying_inputs; - GLuint first_curbe_grf; - GLuint first_curbe_grf_16; + GLuint dispatch_grf_start_reg_16; GLuint reg_blocks; GLuint reg_blocks_16; - GLuint total_scratch; struct { /** @{ @@ -392,9 +394,13 @@ struct brw_wm_prog_data { /** @} */ } binding_table; + uint8_t computed_depth_mode; + + bool no_8; bool dual_src_blend; bool uses_pos_offset; bool uses_omask; + bool uses_kill; uint32_t prog_offset_16; /** @@ -581,22 +587,16 @@ struct brw_vec4_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; - /** - * Register where the thread expects to find input data from the URB - * (typically uniforms, followed by per-vertex inputs). - */ - unsigned dispatch_grf_start_reg; - - GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint total_scratch; /* Used for calculating urb partitions. In the VS, this is the size of the * URB entry used for both input and output to the thread. In the GS, this * is the size of the URB entry used for output. */ GLuint urb_entry_size; + + bool simd8; }; @@ -609,46 +609,7 @@ struct brw_vs_prog_data { GLbitfield64 inputs_read; bool uses_vertexid; -}; - - -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to - * this struct! - */ -struct brw_gs_prog_data -{ - struct brw_vec4_prog_data base; - - /** - * Size of an output vertex, measured in HWORDS (32 bytes). - */ - unsigned output_vertex_size_hwords; - - unsigned output_topology; - - /** - * Size of the control data (cut bits or StreamID bits), in hwords (32 - * bytes). 0 if there is no control data. - */ - unsigned control_data_header_size_hwords; - - /** - * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID - * if the control data is StreamID bits, or - * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). - * Ignored if control_data_header_size is 0. - */ - unsigned control_data_format; - - bool include_primitive_id; - - int invocations; - - /** - * True if the thread should be dispatched in DUAL_INSTANCE mode, false if - * it should be dispatched in DUAL_OBJECT mode. - */ - bool dual_instanced_dispatch; + bool uses_instanceid; }; /** Number of texture sampler units */ @@ -695,7 +656,77 @@ struct brw_gs_prog_data 2 /* shader time, pull constants */) #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) -#define BRW_MAX_GEN6_GS_SURFACES SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS) + +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to + * this struct! + */ +struct brw_gs_prog_data +{ + struct brw_vec4_prog_data base; + + /** + * Size of an output vertex, measured in HWORDS (32 bytes). + */ + unsigned output_vertex_size_hwords; + + unsigned output_topology; + + /** + * Size of the control data (cut bits or StreamID bits), in hwords (32 + * bytes). 0 if there is no control data. + */ + unsigned control_data_header_size_hwords; + + /** + * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID + * if the control data is StreamID bits, or + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). + * Ignored if control_data_header_size is 0. + */ + unsigned control_data_format; + + bool include_primitive_id; + + int invocations; + + /** + * Dispatch mode, can be any of: + * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT + * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE + * GEN7_GS_DISPATCH_MODE_SINGLE + */ + int dispatch_mode; + + /** + * Gen6 transform feedback enabled flag. + */ + bool gen6_xfb_enabled; + + /** + * Gen6: Provoking vertex convention for odd-numbered triangles + * in tristrips. + */ + GLuint pv_first:1; + + /** + * Gen6: Number of varyings that are output to transform feedback. + */ + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ + + /** + * Gen6: Map from the index of a transform feedback binding table entry to the + * gl_varying_slot that should be streamed out through that binding table + * entry. + */ + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; + + /** + * Gen6: Map from the index of a transform feedback binding table entry to the + * swizzles that should be used when streaming out data through that + * binding table entry. + */ + unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; +}; /** * Stride in bytes between shader_time entries. @@ -705,34 +736,10 @@ struct brw_gs_prog_data */ #define SHADER_TIME_STRIDE 64 -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_BLORP_BLIT_PROG, - BRW_BLORP_CONST_COLOR_PROG, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, /* scissor state on gen6 */ - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_FF_GS_UNIT, - BRW_FF_GS_PROG, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - - BRW_MAX_CACHE -}; - struct brw_cache_item { /** * Effectively part of the key, cache_id identifies what kind of state - * buffer is involved, and also which brw->state.dirty.cache flag should - * be set when this cache item is chosen. + * buffer is involved, and also which dirty flag should set. */ enum brw_cache_id cache_id; /** 32-bit hash of the key data */ @@ -799,33 +806,6 @@ enum shader_time_shader_type { ST_FS16_RESET, }; -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so + * that we don't have to reemit that state every time we change FBOs. + */ + int num_samples; /** * Platform specific constants containing the maximum number of threads @@ -1195,6 +1190,7 @@ struct brw_context */ struct { GLuint vsize; /* vertex size plus header in urb registers */ + GLuint gsize; /* GS output size in urb registers */ GLuint csize; /* constant buffer size in urb registers */ GLuint sfsize; /* setup data size in urb registers */ @@ -1217,10 +1213,10 @@ struct brw_context GLuint cs_start; GLuint size; /* Hardware URB size, in KB. */ - /* gen6: True if the most recently sent _3DSTATE_URB message allocated + /* True if the most recently sent _3DSTATE_URB message allocated * URB space for the GS. */ - bool gen6_gs_previously_active; + bool gs_present; } urb; @@ -1235,25 +1231,13 @@ struct brw_context GLuint vs_size; GLuint total_size; + /** + * Pointer to the (intel_upload.c-generated) BO containing the uniforms + * for upload to the CURBE. + */ drm_intel_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; - /** Offset within curbe_bo of space for next curbe entry */ - GLuint curbe_next_offset; - - /** - * Copy of the last set of CURBEs uploaded. Frequently we'll end up - * in brw_curbe.c with the same set of constant data to be uploaded, - * so we'd rather not upload new constants in that case (it can cause - * a pipeline bubble since only up to 4 can be pipelined at a time). - */ - GLfloat *last_buf; - /** - * Allocation for where to calculate the next set of CURBEs. - * It's a hot enough path that malloc/free of that data matters. - */ - GLfloat *next_buf; - GLuint last_bufsz; } curbe; /** @@ -1272,26 +1256,6 @@ struct brw_context */ struct brw_vue_map vue_map_geom_out; - /** - * Data structures used by all vec4 program compiles (not specific to any - * particular program). - */ - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4; - struct { struct brw_stage_state base; struct brw_vs_prog_data *prog_data; @@ -1317,7 +1281,12 @@ struct brw_context uint32_t state_offset; uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_MAX_GEN6_GS_SURFACES]; + /** + * Surface offsets for the binding table. We only need surfaces to + * implement transform feedback so BRW_MAX_SOL_BINDINGS is all that we + * need in this case. + */ + uint32_t surf_offset[BRW_MAX_SOL_BINDINGS]; } ff_gs; struct { @@ -1343,6 +1312,7 @@ struct brw_context uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; + bool viewport_transform_enable; } sf; struct { @@ -1356,28 +1326,7 @@ struct brw_context * Gen6. See brw_update_null_renderbuffer_surface(). */ drm_intel_bo *multisampled_null_render_target_bo; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } reg_sets[2]; + uint32_t fast_clear_op; } wm; @@ -1437,7 +1386,7 @@ struct brw_context struct { uint32_t offset; uint32_t size; - enum state_struct_type type; + enum aub_state_struct_type type; } *state_batch_list; int state_batch_count; @@ -1483,16 +1432,12 @@ struct brw_context double report_time; } shader_time; + struct brw_fast_clear_state *fast_clear_state; + __DRIcontext *driContext; struct intel_screen *intelScreen; }; -static inline bool -is_power_of_two(uint32_t value) -{ - return (value & (value - 1)) == 0; -} - /*====================================================================== * brw_vtbl.c */ @@ -1508,8 +1453,6 @@ extern const char *const brw_vendor_string; extern const char *brw_get_renderer_string(unsigned deviceID); -extern void intelFinish(struct gl_context * ctx); - enum { DRI_CONF_BO_REUSE_DISABLED, DRI_CONF_BO_REUSE_ALL @@ -1532,6 +1475,39 @@ GLboolean brwCreateContext(gl_api api, unsigned *error, void *sharedContextPrivate); +/*====================================================================== + * brw_misc_state.c + */ +GLuint brw_get_rb_for_slice(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, bool flat); + +void brw_meta_updownsample(struct brw_context *brw, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst); + +void brw_meta_fbo_stencil_blit(struct brw_context *brw, + GLfloat srcX0, GLfloat srcY0, + GLfloat srcX1, GLfloat srcY1, + GLfloat dstX0, GLfloat dstY0, + GLfloat dstX1, GLfloat dstY1); + +void brw_meta_stencil_updownsample(struct brw_context *brw, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst); + +bool brw_meta_fast_clear(struct brw_context *brw, + struct gl_framebuffer *fb, + GLbitfield mask, + bool partial_clear); + +void +brw_meta_resolve_color(struct brw_context *brw, + struct intel_mipmap_tree *mt); +void +brw_meta_fast_clear_free(struct brw_context *brw); + + /*====================================================================== * brw_misc_state.c */ @@ -1607,13 +1583,14 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct brw_context *brw); +void brw_fs_alloc_reg_sets(struct intel_screen *screen); /* brw_vec4_reg_allocate.cpp */ -void brw_vec4_alloc_reg_set(struct brw_context *brw); +void brw_vec4_alloc_reg_set(struct intel_screen *screen); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +int brw_disassemble_inst(FILE *file, struct brw_context *brw, + struct brw_inst *inst, bool is_compacted); /* brw_vs.c */ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); @@ -1643,7 +1620,8 @@ brw_update_sol_surface(struct brw_context *brw, void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_shader *shader, struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data); + struct brw_stage_prog_data *prog_data, + bool dword_pitch); void brw_upload_abo_surfaces(struct brw_context *brw, struct gl_shader_program *prog, struct brw_stage_state *stage_state, @@ -1740,6 +1718,8 @@ gen6_get_sample_position(struct gl_context *ctx, struct gl_framebuffer *fb, GLuint index, GLfloat *result); +void +gen6_set_sample_maps(struct gl_context *ctx); /* gen8_multisample_state.c */ void gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samp); @@ -1843,7 +1823,6 @@ struct opcode_desc { extern const struct opcode_desc opcode_descs[128]; extern const char * const conditional_modifier[16]; -extern const char * const reg_encoding[8]; void brw_emit_depthbuffer(struct brw_context *brw); @@ -1858,6 +1837,16 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, uint32_t width, uint32_t height, uint32_t tile_x, uint32_t tile_y); +void +gen6_emit_depth_stencil_hiz(struct brw_context *brw, + struct intel_mipmap_tree *depth_mt, + uint32_t depth_offset, uint32_t depthbuffer_format, + uint32_t depth_surface_type, + struct intel_mipmap_tree *stencil_mt, + bool hiz, bool separate_stencil, + uint32_t width, uint32_t height, + uint32_t tile_x, uint32_t tile_y); + void gen7_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *depth_mt, @@ -1880,7 +1869,7 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum gen6_hiz_op op); -extern const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1]; +uint32_t get_hw_prim_for_gl_prim(int mode); void brw_setup_vec4_key_clip_info(struct brw_context *brw, @@ -1888,11 +1877,11 @@ brw_setup_vec4_key_clip_info(struct brw_context *brw, bool program_uses_clip_distance); void -gen6_upload_vec4_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_vec4_prog_data *prog_data, - struct brw_stage_state *stage_state, - enum state_struct_type type); +gen6_upload_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type); /* ================================================================ * From linux kernel i386 header files, copes with odd sizes better