X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=fa2fc72e435e41568cedc999a77bcad010464492;hb=77a18428fffc938a4e3fa9b592e3e104dda0fe7f;hp=471015cf9d0f2c8dca7e4b5aaabea1be23f0e403;hpb=636d01bd61cac83e13c3c64874e7e34e828ca93a;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..fa2fc72e435 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -142,7 +142,6 @@ enum brw_state_id { BRW_STATE_NR_VS_SURFACES, BRW_STATE_INDEX_BUFFER, BRW_STATE_VS_CONSTBUF, - BRW_STATE_WM_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, }; @@ -169,11 +168,8 @@ enum brw_state_id { */ #define BRW_NEW_BATCH (1 << BRW_STATE_BATCH) /** \see brw.state.depth_region */ -#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES) -#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES) #define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) -#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) @@ -212,13 +208,14 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; - GLboolean use_const_buffer; + bool use_const_buffer; }; @@ -226,9 +223,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - - /** for debugging, which texture units are referenced */ - GLbitfield tex_units_used; }; struct brw_shader { @@ -247,6 +241,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -266,7 +261,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; - GLboolean error; + bool error; int dispatch_width; uint32_t prog_offset_16; @@ -279,6 +274,75 @@ struct brw_wm_prog_data { enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; }; +/** + * Enum representing the i965-specific vertex results that don't correspond + * exactly to any element of gl_vert_result. The values of this enum are + * assigned such that they don't conflict with gl_vert_result. + */ +typedef enum +{ + BRW_VERT_RESULT_NDC = VERT_RESULT_MAX, + BRW_VERT_RESULT_HPOS_DUPLICATE, + BRW_VERT_RESULT_PAD, + BRW_VERT_RESULT_MAX +} brw_vert_result; + + +/** + * Data structure recording the relationship between the gl_vert_result enum + * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a + * single octaword within the VUE (128 bits). + * + * Note that each BRW register contains 256 bits (2 octawords), so when + * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two + * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as + * in a vertex shader), each register corresponds to a single VUE slot, since + * it contains data for two separate vertices. + */ +struct brw_vue_map { + /** + * Map from gl_vert_result value to VUE slot. For gl_vert_results that are + * not stored in a slot (because they are not written, or because + * additional processing is applied before storing them in the VUE), the + * value is -1. + */ + int vert_result_to_slot[BRW_VERT_RESULT_MAX]; + + /** + * Map from VUE slot to gl_vert_result value. For slots that do not + * directly correspond to a gl_vert_result, the value comes from + * brw_vert_result. + * + * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this + * simplifies code that uses the value stored in slot_to_vert_result to + * create a bit mask). + */ + int slot_to_vert_result[BRW_VERT_RESULT_MAX]; + + /** + * Total number of VUE slots in use + */ + int num_slots; +}; + +/** + * Convert a VUE slot number into a byte offset within the VUE. + */ +static inline GLuint brw_vue_slot_to_offset(GLuint slot) +{ + return 16*slot; +} + +/** + * Convert a vert_result into a byte offset within the VUE. + */ +static inline GLuint brw_vert_result_to_offset(struct brw_vue_map *vue_map, + GLuint vert_result) +{ + return brw_vue_slot_to_offset(vue_map->vert_result_to_slot[vert_result]); +} + + struct brw_sf_prog_data { GLuint urb_read_length; GLuint total_grf; @@ -310,12 +374,20 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + const float *pull_param[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; + bool uses_vertexid; }; @@ -333,31 +405,48 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_DRAW_BUFFERS 8 /** - * Size of our surface binding table for the WM. - * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffers (+2). - */ -#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) - -/** - * Helpers to convert drawing buffers, textures and constant buffers - * to surface binding table indexes, for WM. + * Helpers to create Surface Binding Table indexes for draw buffers, + * textures, and constant buffers. + * + * Shader threads access surfaces via numeric handles, rather than directly + * using pointers. The binding table maps these numeric handles to the + * address of the actual buffer. + * + * For example, a shader might ask to sample from "surface 7." In this case, + * bind[7] would contain a pointer to a texture. + * + * Although the hardware supports separate binding tables per pipeline stage + * (VS, HS, DS, GS, PS), we currently share a single binding table for all of + * them. This is purely for convenience. + * + * Currently our binding tables are (arbitrarily) programmed as follows: + * + * +-------------------------------+ + * | 0 | Draw buffer 0 | . + * | . | . | \ + * | : | : | > Only relevant to the WM. + * | 7 | Draw buffer 7 | / + * |-----|-------------------------| ` + * | 8 | VS Pull Constant Buffer | + * | 9 | WM Pull Constant Buffer | + * |-----|-------------------------| + * | 10 | Texture 0 | + * | . | . | + * | : | : | + * | 25 | Texture 15 | + * +-------------------------------+ + * + * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be + * the identity function or things will break. We do want to keep draw buffers + * first so we can use headerless render target writes for RT 0. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) - -/** - * Size of surface binding table for the VS. - * Only one constant buffer for now. - */ -#define BRW_VS_MAX_SURF 1 - -/** - * Only a VS constant buffer - */ -#define SURF_INDEX_VERT_CONST_BUFFER 0 +#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) +/** Maximum size of the binding table. */ +#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2) enum brw_cache_id { BRW_BLEND_STATE, @@ -423,7 +512,6 @@ struct brw_cache { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*prepare)( struct brw_context *brw ); void (*emit)( struct brw_context *brw ); }; @@ -505,30 +593,18 @@ struct brw_query_object { struct brw_context { struct intel_context intel; /**< base class, must be first field */ - GLuint primitive; + GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ - GLboolean emit_state_always; - GLboolean has_surface_tile_offset; - GLboolean has_compr4; - GLboolean has_negative_rhw_bug; - GLboolean has_aa_line_parameters; - GLboolean has_pln; + bool emit_state_always; + bool has_surface_tile_offset; + bool has_compr4; + bool has_negative_rhw_bug; + bool has_aa_line_parameters; + bool has_pln; + bool new_vs_backend; struct { struct brw_state_flags dirty; - /** - * List of buffers accumulated in brw_validate_state to receive - * drm_intel_bo_check_aperture treatment before exec, so we can - * know if we should flush the batch and try again before - * emitting primitives. - * - * This can be a fixed number as we only have a limited number of - * objects referenced from the batchbuffer in a primitive emit, - * consisting of the vertex buffers, pipelined state pointers, - * the CURBE, the depth buffer, and a query BO. - */ - drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; } state; struct brw_cache cache; @@ -587,8 +663,14 @@ struct brw_context uint32_t CMD_VF_STATISTICS; /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */ uint32_t CMD_PIPELINE_SELECT; - int vs_max_threads; - int wm_max_threads; + + /** + * Platform specific constants containing the maximum number of threads + * for each pipeline stage. + */ + int max_vs_threads; + int max_gs_threads; + int max_wm_threads; /* BRW_NEW_URB_ALLOCATIONS: */ @@ -597,7 +679,7 @@ struct brw_context GLuint csize; /* constant buffer size in urb registers */ GLuint sfsize; /* setup data size in urb registers */ - GLboolean constrained; + bool constrained; GLuint max_vs_entries; /* Maximum number of VS entries */ GLuint max_gs_entries; /* Maximum number of GS entries */ @@ -658,28 +740,53 @@ struct brw_context GLuint last_bufsz; } curbe; + struct { + /** Binding table of pointers to surf_bo entries */ + uint32_t bo_offset; + uint32_t surf_offset[BRW_MAX_SURFACES]; + } bind; + + /** SAMPLER_STATE count and offset */ + struct { + GLuint count; + uint32_t offset; + } sampler; + struct { struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; uint32_t state_offset; - /** Binding table of pointers to surf_bo entries */ - uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_VS_MAX_SURF]; - GLuint nr_surfaces; - uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { struct brw_gs_prog_data *prog_data; - GLboolean prog_active; + bool prog_active; /** Offset in the program cache to the CLIP program pre-gen6 */ uint32_t prog_offset; uint32_t state_offset; @@ -724,20 +831,12 @@ struct brw_context uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; GLuint render_surf; - GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; - GLuint sampler_count; - uint32_t sampler_offset; - /** Offset in the program cache to the WM program */ uint32_t prog_offset; - /** Binding table of pointers to surf_bo entries */ - uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ drm_intel_bo *const_bo; /* pull constant buffer. */ @@ -747,6 +846,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; @@ -761,14 +883,14 @@ struct brw_context struct brw_query_object *obj; drm_intel_bo *bo; int index; - GLboolean active; + bool active; } query; /* Used to give every program string a unique id */ GLuint program_id; - int num_prepare_atoms, num_emit_atoms; - struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + int num_atoms; + const struct brw_tracked_state **atoms; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { @@ -777,16 +899,52 @@ struct brw_context enum state_struct_type type; } *state_batch_list; int state_batch_count; + + /** + * \brief State needed to execute HiZ meta-ops + * + * All fields except \c op are initialized by gen6_hiz_init(). + */ + struct brw_hiz_state { + /** + * \brief Indicates which HiZ operation is in progress. + * + * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + enum brw_hiz_op { + BRW_HIZ_OP_NONE = 0, + BRW_HIZ_OP_DEPTH_CLEAR, + BRW_HIZ_OP_DEPTH_RESOLVE, + BRW_HIZ_OP_HIZ_RESOLVE, + } op; + + /** \brief Shader state */ + struct { + GLuint program; + GLuint position_vbo; + GLint position_location; + } shader; + + /** \brief VAO for the rectangle primitive's vertices. */ + GLuint vao; + + GLuint fbo; + struct gl_renderbuffer *depth_rb; + } hiz; }; + #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) struct brw_instruction_info { char *name; int nsrc; int ndst; - GLboolean is_arith; + bool is_arith; }; extern const struct brw_instruction_info brw_opcodes[128]; @@ -798,10 +956,10 @@ void brwInitVtbl( struct brw_context *brw ); /*====================================================================== * brw_context.c */ -GLboolean brwCreateContext( int api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - void *sharedContextPrivate); +bool brwCreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + void *sharedContextPrivate); /*====================================================================== * brw_queryobj.c @@ -827,6 +985,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ @@ -839,6 +1001,24 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +/* brw_vs.c */ +void brw_compute_vue_map(struct brw_vue_map *vue_map, + const struct intel_context *intel, + bool userclip_active, + GLbitfield64 outputs_written); +gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); + +/* brw_wm.c */ +unsigned +brw_compute_barycentric_interp_modes(bool shade_model_flat, + const struct gl_fragment_program *fprog); + +/* gen6_clip_state.c */ +bool +brw_fprog_uses_noperspective(const struct gl_fragment_program *fprog); + + + /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: @@ -874,7 +1054,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -884,21 +1064,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } @@ -933,6 +1115,6 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset, return brw->cache.bo->offset + prog_offset; } -GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); +bool brw_do_cubemap_normalize(struct exec_list *instructions); #endif