X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=d1ec2ea447ea189df93a3fe610b51b95fcd0f75c;hb=f92fbd554f2e9e702a2bd650c9b2571a3f4f1ab8;hp=854583e4ba0d07bc3be3ccfc598032ea2431f179;hpb=741782b5948bb3d01d699f062a37513c2e73b076;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 854583e4ba0..d1ec2ea447e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -40,6 +40,8 @@ #include "main/mm.h" #include "main/mtypes.h" #include "brw_structs.h" +#include "intel_aub.h" +#include "program/prog_parameter.h" #ifdef __cplusplus extern "C" { @@ -140,10 +142,8 @@ extern "C" { * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_MAX_CURBE (32*16) - struct brw_context; -struct brw_instruction; +struct brw_inst; struct brw_vs_prog_key; struct brw_vec4_prog_key; struct brw_wm_prog_key; @@ -181,6 +181,7 @@ enum brw_state_id { BRW_STATE_META_IN_PROGRESS, BRW_STATE_INTERPOLATION_MAP, BRW_STATE_PUSH_CONSTANT_ALLOCATION, + BRW_STATE_NUM_SAMPLES, BRW_NUM_STATE_BITS }; @@ -220,6 +221,7 @@ enum brw_state_id { #define BRW_NEW_META_IN_PROGRESS (1 << BRW_STATE_META_IN_PROGRESS) #define BRW_NEW_INTERPOLATION_MAP (1 << BRW_STATE_INTERPOLATION_MAP) #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION) +#define BRW_NEW_NUM_SAMPLES (1 << BRW_STATE_NUM_SAMPLES) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -227,75 +229,78 @@ struct brw_state_flags { /** * State update flags signalled as the result of brw_tracked_state updates */ - GLuint brw; - /** State update flags signalled by brw_state_cache.c searches */ + uint64_t brw; + /** + * State update flags that used to be signalled by brw_state_cache.c + * searches. + * + * Now almost all of that state is just streamed out on demand, but the + * flags for those state blobs updating have stayed in the same bitfield. + * brw_state_cache.c still flags CACHE_NEW_*_PROG. + */ GLuint cache; }; -#define AUB_TRACE_TYPE_MASK 0x0000ff00 -#define AUB_TRACE_TYPE_NOTYPE (0 << 8) -#define AUB_TRACE_TYPE_BATCH (1 << 8) -#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) -#define AUB_TRACE_TYPE_2D_MAP (6 << 8) -#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) -#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) -#define AUB_TRACE_TYPE_1D_MAP (10 << 8) -#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) -#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) -#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) -#define AUB_TRACE_TYPE_GENERAL (14 << 8) -#define AUB_TRACE_TYPE_SURFACE (15 << 8) /** - * state_struct_type enum values are encoded with the top 16 bits representing - * the type to be delivered to the .aub file, and the bottom 16 bits - * representing the subtype. This macro performs the encoding. + * Enum representing the different pipelines. */ -#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) - -enum state_struct_type { - AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), - AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), - AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), - AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), - AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), - AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), - AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), - AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), - AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), - AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), - AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), - AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), - AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), - - AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), - AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), - AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), - - AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), - AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), - AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), - AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), - AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), -}; +typedef enum { + /** + * 3D rendering pipeline (vertex through fragment shader). + */ + BRW_PIPELINE_3D, + + /** + * Compute shader pipeline. + */ + BRW_PIPELINE_COMPUTE, + + BRW_NUM_PIPELINES +} brw_pipeline; + /** - * Decode a state_struct_type value to determine the type that should be - * stored in the .aub file. + * Set one of the bits in a field of brw_state_flags. */ -static inline uint32_t AUB_TRACE_TYPE(enum state_struct_type ss_type) -{ - return (ss_type & 0xFFFF0000) >> 16; -} +#define SET_DIRTY_BIT(FIELD, FLAG) \ + do { \ + for (int pipeline = 0; pipeline < BRW_NUM_PIPELINES; pipeline++) \ + brw->state.pipeline_dirty[pipeline].FIELD |= (FLAG); \ + } while (false) + /** - * Decode a state_struct_type value to determine the subtype that should be - * stored in the .aub file. + * Set all of the bits in a field of brw_state_flags. */ -static inline uint32_t AUB_TRACE_SUBTYPE(enum state_struct_type ss_type) -{ - return ss_type & 0xFFFF; -} +#define SET_DIRTY_ALL(FIELD) \ + do { \ + /* ~0 == 0xffffffff, so make sure field is <= 32 bits */ \ + STATIC_ASSERT(sizeof(brw->state.pipeline_dirty[0].FIELD) == 4); \ + for (int pipeline = 0; pipeline < BRW_NUM_PIPELINES; pipeline++) \ + brw->state.pipeline_dirty[pipeline].FIELD = ~0; \ + } while (false) + + +/** + * Set all of the bits in a field of brw_state_flags. + */ +#define SET_DIRTY64_ALL(FIELD) \ + do { \ + /* ~0ULL == 0xffffffffffffffff, so make sure field is <= 64 bits */ \ + STATIC_ASSERT(sizeof(brw->state.pipeline_dirty[0].FIELD) == 8); \ + for (int pipeline = 0; pipeline < BRW_NUM_PIPELINES; pipeline++) \ + brw->state.pipeline_dirty[pipeline].FIELD = ~(0ULL); \ + } while (false) + + +/** + * Check one of the bits in a field of brw_state_flags. + */ +#define CHECK_DIRTY_BIT(FIELD, FLAG) \ + ((brw->state.pipeline_dirty[brw->state.current_pipeline].FIELD & (FLAG)) \ + != 0) + /** Subclass of Mesa vertex program */ struct brw_vertex_program { @@ -354,14 +359,23 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + unsigned curb_read_length; + unsigned total_scratch; + + /** + * Register where the thread expects to find input data from the URB + * (typically uniforms, followed by vertex or fragment attributes). + */ + unsigned dispatch_grf_start_reg; + /* Pointers to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). * * These must be the last fields of the struct (see * brw_stage_prog_data_compare()). */ - const float **param; - const float **pull_param; + const gl_constant_value **param; + const gl_constant_value **pull_param; }; /* Data about a particular attempt to compile a program. Note that @@ -375,14 +389,11 @@ struct brw_stage_prog_data { struct brw_wm_prog_data { struct brw_stage_prog_data base; - GLuint curb_read_length; GLuint num_varying_inputs; - GLuint first_curbe_grf; - GLuint first_curbe_grf_16; + GLuint dispatch_grf_start_reg_16; GLuint reg_blocks; GLuint reg_blocks_16; - GLuint total_scratch; struct { /** @{ @@ -392,6 +403,7 @@ struct brw_wm_prog_data { /** @} */ } binding_table; + bool no_8; bool dual_src_blend; bool uses_pos_offset; bool uses_omask; @@ -581,16 +593,8 @@ struct brw_vec4_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; - /** - * Register where the thread expects to find input data from the URB - * (typically uniforms, followed by per-vertex inputs). - */ - unsigned dispatch_grf_start_reg; - - GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint total_scratch; /* Used for calculating urb partitions. In the VS, this is the size of the * URB entry used for both input and output to the thread. In the GS, this @@ -820,12 +824,6 @@ enum shader_time_shader_type { #define CACHE_NEW_CLIP_UNIT (1<DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so + * that we don't have to reemit that state every time we change FBOs. + */ + int num_samples; /** * Platform specific constants containing the maximum number of threads @@ -1245,25 +1224,13 @@ struct brw_context GLuint vs_size; GLuint total_size; + /** + * Pointer to the (intel_upload.c-generated) BO containing the uniforms + * for upload to the CURBE. + */ drm_intel_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; - /** Offset within curbe_bo of space for next curbe entry */ - GLuint curbe_next_offset; - - /** - * Copy of the last set of CURBEs uploaded. Frequently we'll end up - * in brw_curbe.c with the same set of constant data to be uploaded, - * so we'd rather not upload new constants in that case (it can cause - * a pipeline bubble since only up to 4 can be pipelined at a time). - */ - GLfloat *last_buf; - /** - * Allocation for where to calculate the next set of CURBEs. - * It's a hot enough path that malloc/free of that data matters. - */ - GLfloat *next_buf; - GLuint last_bufsz; } curbe; /** @@ -1282,26 +1249,6 @@ struct brw_context */ struct brw_vue_map vue_map_geom_out; - /** - * Data structures used by all vec4 program compiles (not specific to any - * particular program). - */ - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4; - struct { struct brw_stage_state base; struct brw_vs_prog_data *prog_data; @@ -1353,6 +1300,7 @@ struct brw_context uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; + bool viewport_transform_enable; } sf; struct { @@ -1366,28 +1314,7 @@ struct brw_context * Gen6. See brw_update_null_renderbuffer_surface(). */ drm_intel_bo *multisampled_null_render_target_bo; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } reg_sets[2]; + uint32_t fast_clear_op; } wm; @@ -1440,14 +1367,14 @@ struct brw_context int entries_per_oa_snapshot; } perfmon; - int num_atoms; - const struct brw_tracked_state **atoms; + int num_atoms[BRW_NUM_PIPELINES]; + const struct brw_tracked_state **atoms[BRW_NUM_PIPELINES]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { uint32_t offset; uint32_t size; - enum state_struct_type type; + enum aub_state_struct_type type; } *state_batch_list; int state_batch_count; @@ -1493,16 +1420,12 @@ struct brw_context double report_time; } shader_time; + struct brw_fast_clear_state *fast_clear_state; + __DRIcontext *driContext; struct intel_screen *intelScreen; }; -static inline bool -is_power_of_two(uint32_t value) -{ - return (value & (value - 1)) == 0; -} - /*====================================================================== * brw_vtbl.c */ @@ -1518,8 +1441,6 @@ extern const char *const brw_vendor_string; extern const char *brw_get_renderer_string(unsigned deviceID); -extern void intelFinish(struct gl_context * ctx); - enum { DRI_CONF_BO_REUSE_DISABLED, DRI_CONF_BO_REUSE_ALL @@ -1542,6 +1463,39 @@ GLboolean brwCreateContext(gl_api api, unsigned *error, void *sharedContextPrivate); +/*====================================================================== + * brw_misc_state.c + */ +GLuint brw_get_rb_for_slice(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, bool flat); + +void brw_meta_updownsample(struct brw_context *brw, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst); + +void brw_meta_fbo_stencil_blit(struct brw_context *brw, + GLfloat srcX0, GLfloat srcY0, + GLfloat srcX1, GLfloat srcY1, + GLfloat dstX0, GLfloat dstY0, + GLfloat dstX1, GLfloat dstY1); + +void brw_meta_stencil_updownsample(struct brw_context *brw, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst); + +bool brw_meta_fast_clear(struct brw_context *brw, + struct gl_framebuffer *fb, + GLbitfield mask, + bool partial_clear); + +void +brw_meta_resolve_color(struct brw_context *brw, + struct intel_mipmap_tree *mt); +void +brw_meta_fast_clear_free(struct brw_context *brw); + + /*====================================================================== * brw_misc_state.c */ @@ -1617,13 +1571,14 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct brw_context *brw); +void brw_fs_alloc_reg_sets(struct intel_screen *screen); /* brw_vec4_reg_allocate.cpp */ -void brw_vec4_alloc_reg_set(struct brw_context *brw); +void brw_vec4_alloc_reg_set(struct intel_screen *screen); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +int brw_disassemble_inst(FILE *file, struct brw_context *brw, + struct brw_inst *inst, bool is_compacted); /* brw_vs.c */ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); @@ -1843,7 +1798,6 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset, bool brw_do_cubemap_normalize(struct exec_list *instructions); bool brw_lower_texture_gradients(struct brw_context *brw, struct exec_list *instructions); -bool brw_do_lower_offset_arrays(struct exec_list *instructions); bool brw_do_lower_unnormalized_offset(struct exec_list *instructions); struct opcode_desc { @@ -1854,7 +1808,6 @@ struct opcode_desc { extern const struct opcode_desc opcode_descs[128]; extern const char * const conditional_modifier[16]; -extern const char * const reg_encoding[8]; void brw_emit_depthbuffer(struct brw_context *brw); @@ -1869,6 +1822,16 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, uint32_t width, uint32_t height, uint32_t tile_x, uint32_t tile_y); +void +gen6_emit_depth_stencil_hiz(struct brw_context *brw, + struct intel_mipmap_tree *depth_mt, + uint32_t depth_offset, uint32_t depthbuffer_format, + uint32_t depth_surface_type, + struct intel_mipmap_tree *stencil_mt, + bool hiz, bool separate_stencil, + uint32_t width, uint32_t height, + uint32_t tile_x, uint32_t tile_y); + void gen7_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *depth_mt, @@ -1891,7 +1854,7 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum gen6_hiz_op op); -extern const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1]; +uint32_t get_hw_prim_for_gl_prim(int mode); void brw_setup_vec4_key_clip_info(struct brw_context *brw, @@ -1899,11 +1862,11 @@ brw_setup_vec4_key_clip_info(struct brw_context *brw, bool program_uses_clip_distance); void -gen6_upload_vec4_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_vec4_prog_data *prog_data, - struct brw_stage_state *stage_state, - enum state_struct_type type); +gen6_upload_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type); /* ================================================================ * From linux kernel i386 header files, copes with odd sizes better