X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=fa2fc72e435e41568cedc999a77bcad010464492;hb=77a18428fffc938a4e3fa9b592e3e104dda0fe7f;hp=7069724466ab5f4e7693eac4757849143df8657a;hpb=652901e95b4ed406293d0e1fabee857c054119b1;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7069724466a..fa2fc72e435 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -120,32 +120,58 @@ struct brw_context; -#define BRW_NEW_URB_FENCE 0x1 -#define BRW_NEW_FRAGMENT_PROGRAM 0x2 -#define BRW_NEW_VERTEX_PROGRAM 0x4 -#define BRW_NEW_INPUT_DIMENSIONS 0x8 -#define BRW_NEW_CURBE_OFFSETS 0x10 -#define BRW_NEW_REDUCED_PRIMITIVE 0x20 -#define BRW_NEW_PRIMITIVE 0x40 -#define BRW_NEW_CONTEXT 0x80 -#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 -#define BRW_NEW_PSP 0x800 -#define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_BINDING_TABLE 0x2000 -#define BRW_NEW_INDICES 0x4000 -#define BRW_NEW_VERTICES 0x8000 +enum brw_state_id { + BRW_STATE_URB_FENCE, + BRW_STATE_FRAGMENT_PROGRAM, + BRW_STATE_VERTEX_PROGRAM, + BRW_STATE_INPUT_DIMENSIONS, + BRW_STATE_CURBE_OFFSETS, + BRW_STATE_REDUCED_PRIMITIVE, + BRW_STATE_PRIMITIVE, + BRW_STATE_CONTEXT, + BRW_STATE_WM_INPUT_DIMENSIONS, + BRW_STATE_PSP, + BRW_STATE_WM_SURFACES, + BRW_STATE_VS_BINDING_TABLE, + BRW_STATE_GS_BINDING_TABLE, + BRW_STATE_PS_BINDING_TABLE, + BRW_STATE_INDICES, + BRW_STATE_VERTICES, + BRW_STATE_BATCH, + BRW_STATE_NR_WM_SURFACES, + BRW_STATE_NR_VS_SURFACES, + BRW_STATE_INDEX_BUFFER, + BRW_STATE_VS_CONSTBUF, + BRW_STATE_PROGRAM_CACHE, + BRW_STATE_STATE_BASE_ADDRESS, +}; + +#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) +#define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM) +#define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM) +#define BRW_NEW_INPUT_DIMENSIONS (1 << BRW_STATE_INPUT_DIMENSIONS) +#define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS) +#define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE) +#define BRW_NEW_PRIMITIVE (1 << BRW_STATE_PRIMITIVE) +#define BRW_NEW_CONTEXT (1 << BRW_STATE_CONTEXT) +#define BRW_NEW_WM_INPUT_DIMENSIONS (1 << BRW_STATE_WM_INPUT_DIMENSIONS) +#define BRW_NEW_PSP (1 << BRW_STATE_PSP) +#define BRW_NEW_WM_SURFACES (1 << BRW_STATE_WM_SURFACES) +#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE) +#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE) +#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE) +#define BRW_NEW_INDICES (1 << BRW_STATE_INDICES) +#define BRW_NEW_VERTICES (1 << BRW_STATE_VERTICES) /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. */ -#define BRW_NEW_BATCH 0x10000 -/** brw->depth_region updated */ -#define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_WM_SURFACES 0x40000 -#define BRW_NEW_NR_VS_SURFACES 0x80000 -#define BRW_NEW_INDEX_BUFFER 0x100000 -#define BRW_NEW_VS_CONSTBUF 0x200000 -#define BRW_NEW_WM_CONSTBUF 0x200000 +#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH) +/** \see brw.state.depth_region */ +#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) +#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) +#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -158,12 +184,38 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; - GLboolean use_const_buffer; + bool use_const_buffer; }; @@ -171,9 +223,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - - /** for debugging, which texture units are referenced */ - GLbitfield tex_units_used; }; struct brw_shader { @@ -192,6 +241,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -204,13 +254,16 @@ struct brw_wm_prog_data { GLuint urb_read_length; GLuint first_curbe_grf; - GLuint total_grf; + GLuint first_curbe_grf_16; + GLuint reg_blocks; + GLuint reg_blocks_16; GLuint total_scratch; GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; - GLboolean error; + bool error; int dispatch_width; + uint32_t prog_offset_16; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). @@ -221,6 +274,75 @@ struct brw_wm_prog_data { enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; }; +/** + * Enum representing the i965-specific vertex results that don't correspond + * exactly to any element of gl_vert_result. The values of this enum are + * assigned such that they don't conflict with gl_vert_result. + */ +typedef enum +{ + BRW_VERT_RESULT_NDC = VERT_RESULT_MAX, + BRW_VERT_RESULT_HPOS_DUPLICATE, + BRW_VERT_RESULT_PAD, + BRW_VERT_RESULT_MAX +} brw_vert_result; + + +/** + * Data structure recording the relationship between the gl_vert_result enum + * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a + * single octaword within the VUE (128 bits). + * + * Note that each BRW register contains 256 bits (2 octawords), so when + * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two + * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as + * in a vertex shader), each register corresponds to a single VUE slot, since + * it contains data for two separate vertices. + */ +struct brw_vue_map { + /** + * Map from gl_vert_result value to VUE slot. For gl_vert_results that are + * not stored in a slot (because they are not written, or because + * additional processing is applied before storing them in the VUE), the + * value is -1. + */ + int vert_result_to_slot[BRW_VERT_RESULT_MAX]; + + /** + * Map from VUE slot to gl_vert_result value. For slots that do not + * directly correspond to a gl_vert_result, the value comes from + * brw_vert_result. + * + * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this + * simplifies code that uses the value stored in slot_to_vert_result to + * create a bit mask). + */ + int slot_to_vert_result[BRW_VERT_RESULT_MAX]; + + /** + * Total number of VUE slots in use + */ + int num_slots; +}; + +/** + * Convert a VUE slot number into a byte offset within the VUE. + */ +static inline GLuint brw_vue_slot_to_offset(GLuint slot) +{ + return 16*slot; +} + +/** + * Convert a vert_result into a byte offset within the VUE. + */ +static inline GLuint brw_vert_result_to_offset(struct brw_vue_map *vue_map, + GLuint vert_result) +{ + return brw_vue_slot_to_offset(vue_map->vert_result_to_slot[vert_result]); +} + + struct brw_sf_prog_data { GLuint urb_read_length; GLuint total_grf; @@ -252,12 +374,20 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + const float *pull_param[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; + bool uses_vertexid; }; @@ -275,31 +405,48 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_DRAW_BUFFERS 8 /** - * Size of our surface binding table for the WM. - * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffers (+2). - */ -#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) - -/** - * Helpers to convert drawing buffers, textures and constant buffers - * to surface binding table indexes, for WM. + * Helpers to create Surface Binding Table indexes for draw buffers, + * textures, and constant buffers. + * + * Shader threads access surfaces via numeric handles, rather than directly + * using pointers. The binding table maps these numeric handles to the + * address of the actual buffer. + * + * For example, a shader might ask to sample from "surface 7." In this case, + * bind[7] would contain a pointer to a texture. + * + * Although the hardware supports separate binding tables per pipeline stage + * (VS, HS, DS, GS, PS), we currently share a single binding table for all of + * them. This is purely for convenience. + * + * Currently our binding tables are (arbitrarily) programmed as follows: + * + * +-------------------------------+ + * | 0 | Draw buffer 0 | . + * | . | . | \ + * | : | : | > Only relevant to the WM. + * | 7 | Draw buffer 7 | / + * |-----|-------------------------| ` + * | 8 | VS Pull Constant Buffer | + * | 9 | WM Pull Constant Buffer | + * |-----|-------------------------| + * | 10 | Texture 0 | + * | . | . | + * | : | : | + * | 25 | Texture 15 | + * +-------------------------------+ + * + * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be + * the identity function or things will break. We do want to keep draw buffers + * first so we can use headerless render target writes for RT 0. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) - -/** - * Size of surface binding table for the VS. - * Only one constant buffer for now. - */ -#define BRW_VS_MAX_SURF 1 - -/** - * Only a VS constant buffer - */ -#define SURF_INDEX_VERT_CONST_BUFFER 0 +#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) +/** Maximum size of the binding table. */ +#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2) enum brw_cache_id { BRW_BLEND_STATE, @@ -308,7 +455,6 @@ enum brw_cache_id { BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, BRW_SAMPLER, BRW_WM_UNIT, BRW_SF_PROG, @@ -335,11 +481,11 @@ struct brw_cache_item { /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ + GLuint aux_size; const void *key; - drm_intel_bo **reloc_bufs; - GLuint nr_reloc_bufs; - drm_intel_bo *bo; + uint32_t offset; + uint32_t size; struct brw_cache_item *next; }; @@ -350,14 +496,11 @@ struct brw_cache { struct brw_context *brw; struct brw_cache_item **items; + drm_intel_bo *bo; GLuint size, n_items; - char *name[BRW_MAX_CACHE]; - - /* Record of the last BOs chosen for each cache_id. Used to set - * brw->state.dirty.cache when a new cache item is chosen. - */ - drm_intel_bo *last_bo[BRW_MAX_CACHE]; + uint32_t next_offset; + bool bo_used_by_gpu; }; @@ -369,7 +512,6 @@ struct brw_cache { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*prepare)( struct brw_context *brw ); void (*emit)( struct brw_context *brw ); }; @@ -381,7 +523,6 @@ struct brw_tracked_state { #define CACHE_NEW_CC_VP (1<= 1. + * + * gen7: Same meaning, but in 512-bit (64-byte) rows. + */ GLuint vs_size; -/* GLuint gs_size; */ -/* GLuint clip_size; */ -/* GLuint sf_size; */ -/* GLuint cs_size; */ + GLuint gs_size; GLuint vs_start; GLuint gs_start; @@ -603,45 +740,81 @@ struct brw_context GLuint last_bufsz; } curbe; + struct { + /** Binding table of pointers to surf_bo entries */ + uint32_t bo_offset; + uint32_t surf_offset[BRW_MAX_SURFACES]; + } bind; + + /** SAMPLER_STATE count and offset */ + struct { + GLuint count; + uint32_t offset; + } sampler; + struct { struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; + /** Offset in the program cache to the VS program */ + uint32_t prog_offset; + uint32_t state_offset; - /** Binding table of pointers to surf_bo entries */ - drm_intel_bo *bind_bo; - uint32_t bind_bo_offset; - drm_intel_bo *surf_bo[BRW_VS_MAX_SURF]; - uint32_t surf_offset[BRW_VS_MAX_SURF]; - GLuint nr_surfaces; + uint32_t push_const_offset; /* Offset in the batchbuffer */ + int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { struct brw_gs_prog_data *prog_data; - GLboolean prog_active; - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; + bool prog_active; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; + uint32_t state_offset; } gs; struct { struct brw_clip_prog_data *prog_data; - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; - drm_intel_bo *vp_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; + + /* Offset in the batch to the CLIP state on pre-gen6. */ + uint32_t state_offset; + + /* As of gen6, this is the offset in the batch to the CLIP VP, + * instead of vp_bo. + */ + uint32_t vp_offset; } clip; struct { struct brw_sf_prog_data *prog_data; - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; - drm_intel_bo *vp_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; + uint32_t state_offset; + uint32_t vp_offset; } sf; struct { @@ -653,69 +826,125 @@ struct brw_context */ GLbitfield input_size_masks[4]; - /** Array of surface default colors (texture border color) */ - drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT]; + /** offsets in the batch to sampler default colors (texture border color) + */ + uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; GLuint render_surf; - GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; - GLuint sampler_count; - drm_intel_bo *sampler_bo; + /** Offset in the program cache to the WM program */ + uint32_t prog_offset; - /** Binding table of pointers to surf_bo entries */ - drm_intel_bo *bind_bo; - uint32_t bind_bo_offset; - drm_intel_bo *surf_bo[BRW_WM_MAX_SURF]; - uint32_t surf_offset[BRW_WM_MAX_SURF]; + uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ - drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** - * This is the push constant BO on gen6. + * This is offset in the batch to the push constants on gen6. * * Pre-gen6, push constants live in the CURBE. */ - drm_intel_bo *push_const_bo; - } wm; + uint32_t push_const_offset; + /** @{ register allocator */ - struct { - /* gen4 */ - drm_intel_bo *prog_bo; - drm_intel_bo *vp_bo; + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ + } wm; - /* gen6 */ - drm_intel_bo *blend_state_bo; - drm_intel_bo *depth_stencil_state_bo; - drm_intel_bo *color_calc_state_bo; - drm_intel_bo *state_bo; + struct { uint32_t state_offset; + uint32_t blend_state_offset; + uint32_t depth_stencil_state_offset; + uint32_t vp_offset; } cc; struct { struct brw_query_object *obj; drm_intel_bo *bo; int index; - GLboolean active; + bool active; } query; /* Used to give every program string a unique id */ GLuint program_id; + + int num_atoms; + const struct brw_tracked_state **atoms; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; + + /** + * \brief State needed to execute HiZ meta-ops + * + * All fields except \c op are initialized by gen6_hiz_init(). + */ + struct brw_hiz_state { + /** + * \brief Indicates which HiZ operation is in progress. + * + * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + enum brw_hiz_op { + BRW_HIZ_OP_NONE = 0, + BRW_HIZ_OP_DEPTH_CLEAR, + BRW_HIZ_OP_DEPTH_RESOLVE, + BRW_HIZ_OP_HIZ_RESOLVE, + } op; + + /** \brief Shader state */ + struct { + GLuint program; + GLuint position_vbo; + GLint position_location; + } shader; + + /** \brief VAO for the rectangle primitive's vertices. */ + GLuint vao; + + GLuint fbo; + struct gl_renderbuffer *depth_rb; + } hiz; }; + #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) struct brw_instruction_info { char *name; int nsrc; int ndst; - GLboolean is_arith; + bool is_arith; }; extern const struct brw_instruction_info brw_opcodes[128]; @@ -727,10 +956,10 @@ void brwInitVtbl( struct brw_context *brw ); /*====================================================================== * brw_context.c */ -GLboolean brwCreateContext( int api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - void *sharedContextPrivate); +bool brwCreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + void *sharedContextPrivate); /*====================================================================== * brw_queryobj.c @@ -756,14 +985,15 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ void brw_upload_urb_fence(struct brw_context *brw); -/* brw_cc.c */ -void brw_update_cc_vp(struct brw_context *brw); - /* brw_curbe.c */ void brw_upload_cs_urb_state(struct brw_context *brw); @@ -771,9 +1001,23 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); -/* brw_state.c */ -void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state); -void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval); +/* brw_vs.c */ +void brw_compute_vue_map(struct brw_vue_map *vue_map, + const struct intel_context *intel, + bool userclip_active, + GLbitfield64 outputs_written); +gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); + +/* brw_wm.c */ +unsigned +brw_compute_barycentric_interp_modes(bool shade_model_flat, + const struct gl_fragment_program *fprog); + +/* gen6_clip_state.c */ +bool +brw_fprog_uses_noperspective(const struct gl_fragment_program *fprog); + + /*====================================================================== * Inline conversion functions. These are better-typed than the @@ -810,7 +1054,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -820,25 +1064,57 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; + } +} + +/** + * Pre-gen6, the register file of the EUs was shared between threads, + * and each thread used some subset allocated on a 16-register block + * granularity. The unit states wanted these block counts. + */ +static inline int +brw_register_blocks(int reg_count) +{ + return ALIGN(reg_count, 16) / 16 - 1; +} + +static inline uint32_t +brw_program_reloc(struct brw_context *brw, uint32_t state_offset, + uint32_t prog_offset) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5) { + /* Using state base address. */ + return prog_offset; } + + drm_intel_bo_emit_reloc(intel->batch.bo, + state_offset, + brw->cache.bo, + prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); + + return brw->cache.bo->offset + prog_offset; } -GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); +bool brw_do_cubemap_normalize(struct exec_list *instructions); #endif -