X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=7e2675acf8f43bdcd80e02afe32f1647b67301f1;hb=2e5a1a2;hp=4b97bfb2ac54e41f4bb7fd03b62674c08cd7ab5a;hpb=8ba0c025a4e0aba97ae596e2121416cf04c0c300;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4b97bfb2ac5..7e2675acf8f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -120,32 +120,62 @@ struct brw_context; -#define BRW_NEW_URB_FENCE 0x1 -#define BRW_NEW_FRAGMENT_PROGRAM 0x2 -#define BRW_NEW_VERTEX_PROGRAM 0x4 -#define BRW_NEW_INPUT_DIMENSIONS 0x8 -#define BRW_NEW_CURBE_OFFSETS 0x10 -#define BRW_NEW_REDUCED_PRIMITIVE 0x20 -#define BRW_NEW_PRIMITIVE 0x40 -#define BRW_NEW_CONTEXT 0x80 -#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 -#define BRW_NEW_PSP 0x800 -#define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_BINDING_TABLE 0x2000 -#define BRW_NEW_INDICES 0x4000 -#define BRW_NEW_VERTICES 0x8000 +enum brw_state_id { + BRW_STATE_URB_FENCE, + BRW_STATE_FRAGMENT_PROGRAM, + BRW_STATE_VERTEX_PROGRAM, + BRW_STATE_INPUT_DIMENSIONS, + BRW_STATE_CURBE_OFFSETS, + BRW_STATE_REDUCED_PRIMITIVE, + BRW_STATE_PRIMITIVE, + BRW_STATE_CONTEXT, + BRW_STATE_WM_INPUT_DIMENSIONS, + BRW_STATE_PSP, + BRW_STATE_WM_SURFACES, + BRW_STATE_VS_BINDING_TABLE, + BRW_STATE_GS_BINDING_TABLE, + BRW_STATE_PS_BINDING_TABLE, + BRW_STATE_INDICES, + BRW_STATE_VERTICES, + BRW_STATE_BATCH, + BRW_STATE_NR_WM_SURFACES, + BRW_STATE_NR_VS_SURFACES, + BRW_STATE_INDEX_BUFFER, + BRW_STATE_VS_CONSTBUF, + BRW_STATE_WM_CONSTBUF, + BRW_STATE_PROGRAM_CACHE, + BRW_STATE_STATE_BASE_ADDRESS, +}; + +#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) +#define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM) +#define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM) +#define BRW_NEW_INPUT_DIMENSIONS (1 << BRW_STATE_INPUT_DIMENSIONS) +#define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS) +#define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE) +#define BRW_NEW_PRIMITIVE (1 << BRW_STATE_PRIMITIVE) +#define BRW_NEW_CONTEXT (1 << BRW_STATE_CONTEXT) +#define BRW_NEW_WM_INPUT_DIMENSIONS (1 << BRW_STATE_WM_INPUT_DIMENSIONS) +#define BRW_NEW_PSP (1 << BRW_STATE_PSP) +#define BRW_NEW_WM_SURFACES (1 << BRW_STATE_WM_SURFACES) +#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE) +#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE) +#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE) +#define BRW_NEW_INDICES (1 << BRW_STATE_INDICES) +#define BRW_NEW_VERTICES (1 << BRW_STATE_VERTICES) /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. */ -#define BRW_NEW_BATCH 0x10000 +#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH) /** \see brw.state.depth_region */ -#define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_WM_SURFACES 0x40000 -#define BRW_NEW_NR_VS_SURFACES 0x80000 -#define BRW_NEW_INDEX_BUFFER 0x100000 -#define BRW_NEW_VS_CONSTBUF 0x200000 -#define BRW_NEW_WM_CONSTBUF 0x400000 +#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES) +#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES) +#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) +#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) +#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) +#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -158,12 +188,38 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; - GLboolean use_const_buffer; + bool use_const_buffer; }; @@ -192,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -205,13 +262,13 @@ struct brw_wm_prog_data { GLuint first_curbe_grf; GLuint first_curbe_grf_16; - GLuint total_grf; - GLuint total_grf_16; + GLuint reg_blocks; + GLuint reg_blocks_16; GLuint total_scratch; GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; - GLboolean error; + bool error; int dispatch_width; uint32_t prog_offset_16; @@ -224,6 +281,75 @@ struct brw_wm_prog_data { enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; }; +/** + * Enum representing the i965-specific vertex results that don't correspond + * exactly to any element of gl_vert_result. The values of this enum are + * assigned such that they don't conflict with gl_vert_result. + */ +typedef enum +{ + BRW_VERT_RESULT_NDC = VERT_RESULT_MAX, + BRW_VERT_RESULT_HPOS_DUPLICATE, + BRW_VERT_RESULT_PAD, + BRW_VERT_RESULT_MAX +} brw_vert_result; + + +/** + * Data structure recording the relationship between the gl_vert_result enum + * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a + * single octaword within the VUE (128 bits). + * + * Note that each BRW register contains 256 bits (2 octawords), so when + * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two + * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as + * in a vertex shader), each register corresponds to a single VUE slot, since + * it contains data for two separate vertices. + */ +struct brw_vue_map { + /** + * Map from gl_vert_result value to VUE slot. For gl_vert_results that are + * not stored in a slot (because they are not written, or because + * additional processing is applied before storing them in the VUE), the + * value is -1. + */ + int vert_result_to_slot[BRW_VERT_RESULT_MAX]; + + /** + * Map from VUE slot to gl_vert_result value. For slots that do not + * directly correspond to a gl_vert_result, the value comes from + * brw_vert_result. + * + * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this + * simplifies code that uses the value stored in slot_to_vert_result to + * create a bit mask). + */ + int slot_to_vert_result[BRW_VERT_RESULT_MAX]; + + /** + * Total number of VUE slots in use + */ + int num_slots; +}; + +/** + * Convert a VUE slot number into a byte offset within the VUE. + */ +static inline GLuint brw_vue_slot_to_offset(GLuint slot) +{ + return 16*slot; +} + +/** + * Convert a vert_result into a byte offset within the VUE. + */ +static inline GLuint brw_vert_result_to_offset(struct brw_vue_map *vue_map, + GLuint vert_result) +{ + return brw_vue_slot_to_offset(vue_map->vert_result_to_slot[vert_result]); +} + + struct brw_sf_prog_data { GLuint urb_read_length; GLuint total_grf; @@ -255,12 +381,19 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + const float *pull_param[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -337,11 +470,11 @@ struct brw_cache_item { /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ + GLuint aux_size; const void *key; - drm_intel_bo **reloc_bufs; - GLuint nr_reloc_bufs; - drm_intel_bo *bo; + uint32_t offset; + uint32_t size; struct brw_cache_item *next; }; @@ -352,14 +485,11 @@ struct brw_cache { struct brw_context *brw; struct brw_cache_item **items; + drm_intel_bo *bo; GLuint size, n_items; - char *name[BRW_MAX_CACHE]; - - /* Record of the last BOs chosen for each cache_id. Used to set - * brw->state.dirty.cache when a new cache item is chosen. - */ - drm_intel_bo *last_bo[BRW_MAX_CACHE]; + uint32_t next_offset; + bool bo_used_by_gpu; }; @@ -453,39 +583,18 @@ struct brw_query_object { struct brw_context { struct intel_context intel; /**< base class, must be first field */ - GLuint primitive; + GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ - GLboolean emit_state_always; - GLboolean has_surface_tile_offset; - GLboolean has_compr4; - GLboolean has_negative_rhw_bug; - GLboolean has_aa_line_parameters; - GLboolean has_pln; + bool emit_state_always; + bool has_surface_tile_offset; + bool has_compr4; + bool has_negative_rhw_bug; + bool has_aa_line_parameters; + bool has_pln; + bool new_vs_backend; struct { struct brw_state_flags dirty; - - /** - * \name Cached region pointers - * - * When the draw buffer is updated, often the depth buffer is not - * changed. Caching the pointer to the buffer's region allows us to - * detect when the buffer has in fact changed, and allows us to avoid - * updating the buffer's GPU state when it has not. - * - * The original of each cached pointer is an instance of - * \c intel_renderbuffer.region. - * - * \see brw_set_draw_region() - * - * \{ - */ - - /** \see struct brw_tracked_state brw_depthbuffer */ - struct intel_region *depth_region; - - /** \} */ - /** * List of buffers accumulated in brw_validate_state to receive * drm_intel_bo_check_aperture treatment before exec, so we can @@ -498,7 +607,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; @@ -567,10 +676,10 @@ struct brw_context GLuint csize; /* constant buffer size in urb registers */ GLuint sfsize; /* setup data size in urb registers */ - GLboolean constrained; + bool constrained; - GLuint max_vs_handles; /* Maximum number of VS handles */ - GLuint max_gs_handles; /* Maximum number of GS handles */ + GLuint max_vs_entries; /* Maximum number of VS entries */ + GLuint max_gs_entries; /* Maximum number of GS entries */ GLuint nr_vs_entries; GLuint nr_gs_entries; @@ -581,6 +690,8 @@ struct brw_context /* gen6: * The length of each URB entry owned by the VS (or GS), as * a number of 1024-bit (128-byte) rows. Should be >= 1. + * + * gen7: Same meaning, but in 512-bit (64-byte) rows. */ GLuint vs_size; GLuint gs_size; @@ -630,8 +741,10 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ - drm_intel_bo *prog_bo; + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; + /** Offset in the program cache to the VS program */ + uint32_t prog_offset; uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ @@ -641,20 +754,39 @@ struct brw_context uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { struct brw_gs_prog_data *prog_data; - GLboolean prog_active; - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; + bool prog_active; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; + uint32_t state_offset; } gs; struct { struct brw_clip_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; /* Offset in the batch to the CLIP state on pre-gen6. */ uint32_t state_offset; @@ -669,7 +801,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; } sf; @@ -690,18 +823,19 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; uint32_t sampler_offset; + /** Offset in the program cache to the WM program */ + uint32_t prog_offset; + /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** * This is offset in the batch to the push constants on gen6. @@ -709,13 +843,33 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; struct { - /* gen4 */ - drm_intel_bo *prog_bo; - uint32_t state_offset; uint32_t blend_state_offset; uint32_t depth_stencil_state_offset; @@ -726,7 +880,7 @@ struct brw_context struct brw_query_object *obj; drm_intel_bo *bo; int index; - GLboolean active; + bool active; } query; /* Used to give every program string a unique id */ @@ -734,16 +888,25 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; + #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) struct brw_instruction_info { char *name; int nsrc; int ndst; - GLboolean is_arith; + bool is_arith; }; extern const struct brw_instruction_info brw_opcodes[128]; @@ -755,10 +918,10 @@ void brwInitVtbl( struct brw_context *brw ); /*====================================================================== * brw_context.c */ -GLboolean brwCreateContext( int api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - void *sharedContextPrivate); +bool brwCreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + void *sharedContextPrivate); /*====================================================================== * brw_queryobj.c @@ -784,6 +947,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ @@ -796,6 +963,14 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +/* brw_vs.c */ +void brw_compute_vue_map(struct brw_vue_map *vue_map, + const struct intel_context *intel, + bool userclip_active, + GLbitfield64 outputs_written); +gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); + + /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: @@ -831,7 +1006,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -841,24 +1016,57 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } -GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); +/** + * Pre-gen6, the register file of the EUs was shared between threads, + * and each thread used some subset allocated on a 16-register block + * granularity. The unit states wanted these block counts. + */ +static inline int +brw_register_blocks(int reg_count) +{ + return ALIGN(reg_count, 16) / 16 - 1; +} + +static inline uint32_t +brw_program_reloc(struct brw_context *brw, uint32_t state_offset, + uint32_t prog_offset) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5) { + /* Using state base address. */ + return prog_offset; + } + + drm_intel_bo_emit_reloc(intel->batch.bo, + state_offset, + brw->cache.bo, + prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); + + return brw->cache.bo->offset + prog_offset; +} + +bool brw_do_cubemap_normalize(struct exec_list *instructions); #endif