#include "brw_structs.h"
#include "main/imports.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
/* Glossary:
*
#define BRW_MAX_CURBE (32*16)
struct brw_context;
+struct brw_instruction;
+struct brw_vs_prog_key;
+struct brw_wm_prog_key;
+struct brw_wm_prog_data;
enum brw_state_id {
BRW_STATE_URB_FENCE,
BRW_STATE_CONTEXT,
BRW_STATE_WM_INPUT_DIMENSIONS,
BRW_STATE_PSP,
- BRW_STATE_WM_SURFACES,
+ BRW_STATE_SURFACES,
BRW_STATE_VS_BINDING_TABLE,
BRW_STATE_GS_BINDING_TABLE,
BRW_STATE_PS_BINDING_TABLE,
BRW_STATE_NR_VS_SURFACES,
BRW_STATE_INDEX_BUFFER,
BRW_STATE_VS_CONSTBUF,
- BRW_STATE_WM_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
+ BRW_STATE_SOL_INDICES,
};
#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE)
#define BRW_NEW_CONTEXT (1 << BRW_STATE_CONTEXT)
#define BRW_NEW_WM_INPUT_DIMENSIONS (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
#define BRW_NEW_PSP (1 << BRW_STATE_PSP)
-#define BRW_NEW_WM_SURFACES (1 << BRW_STATE_WM_SURFACES)
+#define BRW_NEW_SURFACES (1 << BRW_STATE_SURFACES)
#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE)
#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE)
#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE)
*/
#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
/** \see brw.state.depth_region */
-#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
-#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS)
+#define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint cache;
};
+#define AUB_TRACE_TYPE_MASK 0x0000ff00
+#define AUB_TRACE_TYPE_NOTYPE (0 << 8)
+#define AUB_TRACE_TYPE_BATCH (1 << 8)
+#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8)
+#define AUB_TRACE_TYPE_2D_MAP (6 << 8)
+#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8)
+#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8)
+#define AUB_TRACE_TYPE_1D_MAP (10 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8)
+#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8)
+#define AUB_TRACE_TYPE_GENERAL (14 << 8)
+#define AUB_TRACE_TYPE_SURFACE (15 << 8)
+
+/**
+ * state_struct_type enum values are encoded with the top 16 bits representing
+ * the type to be delivered to the .aub file, and the bottom 16 bits
+ * representing the subtype. This macro performs the encoding.
+ */
+#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype))
+
enum state_struct_type {
- AUB_TRACE_VS_STATE = 1,
- AUB_TRACE_GS_STATE = 2,
- AUB_TRACE_CLIP_STATE = 3,
- AUB_TRACE_SF_STATE = 4,
- AUB_TRACE_WM_STATE = 5,
- AUB_TRACE_CC_STATE = 6,
- AUB_TRACE_CLIP_VP_STATE = 7,
- AUB_TRACE_SF_VP_STATE = 8,
- AUB_TRACE_CC_VP_STATE = 0x9,
- AUB_TRACE_SAMPLER_STATE = 0xa,
- AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb,
- AUB_TRACE_SCRATCH_SPACE = 0xc,
- AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd,
-
- AUB_TRACE_SCISSOR_STATE = 0x15,
- AUB_TRACE_BLEND_STATE = 0x16,
- AUB_TRACE_DEPTH_STENCIL_STATE = 0x17,
-
- /* Not written to .aub files the same way the structures above are. */
- AUB_TRACE_NO_TYPE = 0x100,
- AUB_TRACE_BINDING_TABLE = 0x101,
- AUB_TRACE_SURFACE_STATE = 0x102,
- AUB_TRACE_VS_CONSTANTS = 0x103,
- AUB_TRACE_WM_CONSTANTS = 0x104,
+ AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1),
+ AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2),
+ AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3),
+ AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4),
+ AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5),
+ AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6),
+ AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7),
+ AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8),
+ AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9),
+ AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa),
+ AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb),
+ AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc),
+ AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd),
+
+ AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15),
+ AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16),
+ AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17),
+
+ AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0),
+ AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100),
+ AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200),
+ AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0),
+ AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1),
};
+/**
+ * Decode a state_struct_type value to determine the type that should be
+ * stored in the .aub file.
+ */
+static inline uint32_t AUB_TRACE_TYPE(enum state_struct_type ss_type)
+{
+ return (ss_type & 0xFFFF0000) >> 16;
+}
+
+/**
+ * Decode a state_struct_type value to determine the subtype that should be
+ * stored in the .aub file.
+ */
+static inline uint32_t AUB_TRACE_SUBTYPE(enum state_struct_type ss_type)
+{
+ return ss_type & 0xFFFF;
+}
+
/** Subclass of Mesa vertex program */
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
- GLboolean use_const_buffer;
+ bool use_const_buffer;
};
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
-
- /** for debugging, which texture units are referenced */
- GLbitfield tex_units_used;
};
struct brw_shader {
struct gl_shader base;
+ bool compiled_once;
+
/** Shader IR transformed for native compile, at link time. */
struct exec_list *ir;
};
struct gl_shader_program base;
};
-enum param_conversion {
- PARAM_NO_CONVERT,
- PARAM_CONVERT_F2I,
- PARAM_CONVERT_F2U,
- PARAM_CONVERT_F2B,
- PARAM_CONVERT_ZERO,
-};
-
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs:
+ * compiled programs.
+ *
+ * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
+ * struct!
*/
struct brw_wm_prog_data {
GLuint curb_read_length;
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
- GLboolean error;
+ bool error;
+ bool dual_src_blend;
int dispatch_width;
uint32_t prog_offset_16;
- /* Pointer to tracked values (only valid once
+ /**
+ * Mask of which interpolation modes are required by the fragment shader.
+ * Used in hardware setup on gen6+.
+ */
+ uint32_t barycentric_interp_modes;
+
+ /* Pointers to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
+ *
+ * These must be the last fields of the struct (see
+ * brw_wm_prog_data_compare()).
*/
- const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
- enum param_conversion param_convert[MAX_UNIFORMS * 4];
- const float *pull_param[MAX_UNIFORMS * 4];
- enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+ const float **param;
+ const float **pull_param;
};
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_vert_result. The values of this enum are
+ * assigned such that they don't conflict with gl_vert_result.
+ */
+typedef enum
+{
+ BRW_VERT_RESULT_NDC = VERT_RESULT_MAX,
+ BRW_VERT_RESULT_HPOS_DUPLICATE,
+ BRW_VERT_RESULT_PAD,
+ /*
+ * It's actually not a vert_result but just a _mark_ to let sf aware that
+ * he need do something special to handle gl_PointCoord builtin variable
+ * correctly. see compile_sf_prog() for more info.
+ */
+ BRW_VERT_RESULT_PNTC,
+ BRW_VERT_RESULT_MAX
+} brw_vert_result;
+
+
+/**
+ * Data structure recording the relationship between the gl_vert_result enum
+ * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+ /**
+ * Map from gl_vert_result value to VUE slot. For gl_vert_results that are
+ * not stored in a slot (because they are not written, or because
+ * additional processing is applied before storing them in the VUE), the
+ * value is -1.
+ */
+ int vert_result_to_slot[BRW_VERT_RESULT_MAX];
+
+ /**
+ * Map from VUE slot to gl_vert_result value. For slots that do not
+ * directly correspond to a gl_vert_result, the value comes from
+ * brw_vert_result.
+ *
+ * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this
+ * simplifies code that uses the value stored in slot_to_vert_result to
+ * create a bit mask).
+ */
+ int slot_to_vert_result[BRW_VERT_RESULT_MAX];
+
+ /**
+ * Total number of VUE slots in use
+ */
+ int num_slots;
+};
+
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+ return 16*slot;
+}
+
+/**
+ * Convert a vert_result into a byte offset within the VUE.
+ */
+static inline GLuint brw_vert_result_to_offset(struct brw_vue_map *vue_map,
+ GLuint vert_result)
+{
+ return brw_vue_slot_to_offset(vue_map->vert_result_to_slot[vert_result]);
+}
+
+
struct brw_sf_prog_data {
GLuint urb_read_length;
GLuint total_grf;
struct brw_gs_prog_data {
GLuint urb_read_length;
GLuint total_grf;
+
+ /**
+ * Gen6 transform feedback: Amount by which the streaming vertex buffer
+ * indices should be incremented each time the GS is invoked.
+ */
+ unsigned svbi_postincrement_value;
};
+/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
struct brw_vs_prog_data {
+ struct brw_vue_map vue_map;
+
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
GLbitfield64 outputs_written;
GLuint nr_params; /**< number of float params/constants */
+ GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */
GLuint total_scratch;
- GLuint inputs_read;
+ GLbitfield64 inputs_read;
/* Used for calculating urb partitions:
*/
GLuint urb_entry_size;
- const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
- enum param_conversion param_convert[MAX_UNIFORMS * 4];
- const float *pull_param[MAX_UNIFORMS * 4];
- enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
-
bool uses_new_param_layout;
+ bool uses_vertexid;
+ bool userclip;
+
+ int num_surfaces;
+
+ /* These pointers must appear last. See brw_vs_prog_data_compare(). */
+ const float **param;
+ const float **pull_param;
};
#define BRW_MAX_DRAW_BUFFERS 8
/**
- * Size of our surface binding table for the WM.
- * This contains pointers to the drawing surfaces and current texture
- * objects and shader constant buffers (+2).
+ * Max number of binding table entries used for stream output.
+ *
+ * From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
+ * minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
+ *
+ * On Gen6, the size of transform feedback data is limited not by the number
+ * of components but by the number of binding table entries we set aside. We
+ * use one binding table entry for a float, one entry for a vector, and one
+ * entry per matrix column. Since the only way we can communicate our
+ * transform feedback capabilities to the client is via
+ * MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS, we need to plan for the
+ * worst case, in which all the varyings are floats, so we use up one binding
+ * table entry per component. Therefore we need to set aside at least 64
+ * binding table entries for use by transform feedback.
+ *
+ * Note: since we don't currently pack varyings, it is currently impossible
+ * for the client to actually use up all of these binding table entries--if
+ * all of their varyings were floats, they would run out of varying slots and
+ * fail to link. But that's a bug, so it seems prudent to go ahead and
+ * allocate the number of binding table entries we will need once the bug is
+ * fixed.
*/
-#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+#define BRW_MAX_SOL_BINDINGS 64
+
+/** Maximum number of actual buffers used for stream output */
+#define BRW_MAX_SOL_BUFFERS 4
+
+#define BRW_MAX_WM_UBOS 12
+#define BRW_MAX_VS_UBOS 12
/**
- * Helpers to convert drawing buffers, textures and constant buffers
- * to surface binding table indexes, for WM.
+ * Helpers to create Surface Binding Table indexes for draw buffers,
+ * textures, and constant buffers.
+ *
+ * Shader threads access surfaces via numeric handles, rather than directly
+ * using pointers. The binding table maps these numeric handles to the
+ * address of the actual buffer.
+ *
+ * For example, a shader might ask to sample from "surface 7." In this case,
+ * bind[7] would contain a pointer to a texture.
+ *
+ * Currently, our WM binding tables are (arbitrarily) programmed as follows:
+ *
+ * +-------------------------------+
+ * | 0 | Draw buffer 0 |
+ * | . | . |
+ * | : | : |
+ * | 7 | Draw buffer 7 |
+ * |-----|-------------------------|
+ * | 8 | WM Pull Constant Buffer |
+ * |-----|-------------------------|
+ * | 9 | Texture 0 |
+ * | . | . |
+ * | : | : |
+ * | 24 | Texture 15 |
+ * |-----|-------------------------|
+ * | 25 | UBO 0 |
+ * | . | . |
+ * | : | : |
+ * | 36 | UBO 11 |
+ * +-------------------------------+
+ *
+ * Our VS binding tables are programmed as follows:
+ *
+ * +-----+-------------------------+
+ * | 0 | VS Pull Constant Buffer |
+ * +-----+-------------------------+
+ * | 1 | Texture 0 |
+ * | . | . |
+ * | : | : |
+ * | 16 | Texture 15 |
+ * +-----+-------------------------+
+ * | 17 | UBO 0 |
+ * | . | . |
+ * | : | : |
+ * | 28 | UBO 11 |
+ * +-------------------------------+
+ *
+ * Our (gen6) GS binding tables are programmed as follows:
+ *
+ * +-----+-------------------------+
+ * | 0 | SOL Binding 0 |
+ * | . | . |
+ * | : | : |
+ * | 63 | SOL Binding 63 |
+ * +-----+-------------------------+
+ *
+ * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
+ * the identity function or things will break. We do want to keep draw buffers
+ * first so we can use headerless render target writes for RT 0.
*/
#define SURF_INDEX_DRAW(d) (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS)
-#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
+#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
+#define SURF_INDEX_WM_UBO(u) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + u)
-/**
- * Size of surface binding table for the VS.
- * Only one constant buffer for now.
- */
-#define BRW_VS_MAX_SURF 1
+/** Maximum size of the binding table. */
+#define BRW_MAX_WM_SURFACES SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS)
-/**
- * Only a VS constant buffer
- */
-#define SURF_INDEX_VERT_CONST_BUFFER 0
+#define SURF_INDEX_VERT_CONST_BUFFER (0)
+#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
+#define SURF_INDEX_VS_UBO(u) (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
+#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS)
+#define SURF_INDEX_SOL_BINDING(t) ((t))
+#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
+ BRW_BLORP_BLIT_PROG,
BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG,
};
+typedef bool (*cache_aux_compare_func)(const void *a, const void *b,
+ int aux_size, const void *key);
+typedef void (*cache_aux_free_func)(const void *aux);
struct brw_cache {
struct brw_context *brw;
uint32_t next_offset;
bool bo_used_by_gpu;
+
+ /**
+ * Optional functions used in determining whether the prog_data for a new
+ * cache item matches an existing cache item (in case there's relevant data
+ * outside of the prog_data). If NULL, a plain memcmp is done.
+ */
+ cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
+ /** Optional functions for freeing other pointers attached to a prog_data. */
+ cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
*/
struct brw_tracked_state {
struct brw_state_flags dirty;
- void (*prepare)( struct brw_context *brw );
void (*emit)( struct brw_context *brw );
};
uint32_t offset;
/** Byte stride between elements in the uploaded array */
GLuint stride;
+ GLuint step_rate;
};
struct brw_vertex_element {
const struct gl_client_array *glarray;
struct brw_context
{
struct intel_context intel; /**< base class, must be first field */
- GLuint primitive;
+ GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
- GLboolean emit_state_always;
- GLboolean has_surface_tile_offset;
- GLboolean has_compr4;
- GLboolean has_negative_rhw_bug;
- GLboolean has_aa_line_parameters;
- GLboolean has_pln;
+ bool emit_state_always;
+ bool has_surface_tile_offset;
+ bool has_compr4;
+ bool has_negative_rhw_bug;
+ bool has_aa_line_parameters;
+ bool has_pln;
+ bool precompile;
+
+ /**
+ * Some versions of Gen hardware don't do centroid interpolation correctly
+ * on unlit pixels, causing incorrect values for derivatives near triangle
+ * edges. Enabling this flag causes the fragment shader to use
+ * non-centroid interpolation for unlit pixels, at the expense of two extra
+ * fragment shader instructions.
+ */
+ bool needs_unlit_centroid_workaround;
struct {
struct brw_state_flags dirty;
- /**
- * List of buffers accumulated in brw_validate_state to receive
- * drm_intel_bo_check_aperture treatment before exec, so we can
- * know if we should flush the batch and try again before
- * emitting primitives.
- *
- * This can be a fixed number as we only have a limited number of
- * objects referenced from the batchbuffer in a primitive emit,
- * consisting of the vertex buffers, pipelined state pointers,
- * the CURBE, the depth buffer, and a query BO.
- */
- drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- unsigned int validated_bo_count;
} state;
struct brw_cache cache;
uint32_t handle;
uint32_t offset;
uint32_t stride;
+ uint32_t step_rate;
} current_buffers[VERT_ATTRIB_MAX];
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
uint32_t CMD_VF_STATISTICS;
/* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
uint32_t CMD_PIPELINE_SELECT;
- int vs_max_threads;
- int wm_max_threads;
+
+ /**
+ * Platform specific constants containing the maximum number of threads
+ * for each pipeline stage.
+ */
+ int max_vs_threads;
+ int max_gs_threads;
+ int max_wm_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
GLuint csize; /* constant buffer size in urb registers */
GLuint sfsize; /* setup data size in urb registers */
- GLboolean constrained;
+ bool constrained;
GLuint max_vs_entries; /* Maximum number of VS entries */
GLuint max_gs_entries; /* Maximum number of GS entries */
GLuint sf_start;
GLuint cs_start;
GLuint size; /* Hardware URB size, in KB. */
+
+ /* gen6: True if the most recently sent _3DSTATE_URB message allocated
+ * URB space for the GS.
+ */
+ bool gen6_gs_previously_active;
} urb;
GLuint last_bufsz;
} curbe;
+ /** SAMPLER_STATE count and offset */
+ struct {
+ GLuint count;
+ uint32_t offset;
+ } sampler;
+
struct {
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
uint32_t prog_offset;
uint32_t state_offset;
- /** Binding table of pointers to surf_bo entries */
- uint32_t bind_bo_offset;
- uint32_t surf_offset[BRW_VS_MAX_SURF];
- GLuint nr_surfaces;
-
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
*/
uint8_t *ra_reg_to_grf;
/** @} */
+
+ uint32_t bind_bo_offset;
+ uint32_t surf_offset[BRW_MAX_VS_SURFACES];
} vs;
struct {
struct brw_gs_prog_data *prog_data;
- GLboolean prog_active;
+ bool prog_active;
/** Offset in the program cache to the CLIP program pre-gen6 */
uint32_t prog_offset;
uint32_t state_offset;
+
+ uint32_t bind_bo_offset;
+ uint32_t surf_offset[BRW_MAX_GS_SURFACES];
} gs;
struct {
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
GLuint render_surf;
- GLuint nr_surfaces;
drm_intel_bo *scratch_bo;
- GLuint sampler_count;
- uint32_t sampler_offset;
+ /**
+ * Buffer object used in place of multisampled null render targets on
+ * Gen6. See brw_update_null_renderbuffer_surface().
+ */
+ drm_intel_bo *multisampled_null_render_target_bo;
/** Offset in the program cache to the WM program */
uint32_t prog_offset;
- /** Binding table of pointers to surf_bo entries */
- uint32_t bind_bo_offset;
- uint32_t surf_offset[BRW_WM_MAX_SURF];
uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
drm_intel_bo *const_bo; /* pull constant buffer. */
*/
uint32_t push_const_offset;
+ /** Binding table of pointers to surf_bo entries */
+ uint32_t bind_bo_offset;
+ uint32_t surf_offset[BRW_MAX_WM_SURFACES];
+
/** @{ register allocator */
struct ra_regs *regs;
struct brw_query_object *obj;
drm_intel_bo *bo;
int index;
- GLboolean active;
+ bool active;
} query;
/* Used to give every program string a unique id
*/
GLuint program_id;
- int num_prepare_atoms, num_emit_atoms;
- struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
+ int num_atoms;
+ const struct brw_tracked_state **atoms;
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
enum state_struct_type type;
} *state_batch_list;
int state_batch_count;
+
+ struct brw_sol_state {
+ uint32_t svbi_0_starting_index;
+ uint32_t svbi_0_max_index;
+ uint32_t offset_0_batch_start;
+ uint32_t primitives_generated;
+ uint32_t primitives_written;
+ bool counting_primitives_generated;
+ bool counting_primitives_written;
+ } sol;
+
+ uint32_t render_target_format[MESA_FORMAT_COUNT];
+ bool format_supported_as_render_target[MESA_FORMAT_COUNT];
+
+ /* PrimitiveRestart */
+ struct {
+ bool in_progress;
+ bool enable_cut_index;
+ } prim_restart;
+
+ uint32_t num_instances;
};
+
#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
struct brw_instruction_info {
char *name;
int nsrc;
int ndst;
- GLboolean is_arith;
+ bool is_arith;
};
extern const struct brw_instruction_info brw_opcodes[128];
/*======================================================================
* brw_context.c
*/
-GLboolean brwCreateContext( int api,
- const struct gl_config *mesaVis,
- __DRIcontext *driContextPriv,
- void *sharedContextPrivate);
+bool brwCreateContext(int api,
+ const struct gl_config *mesaVis,
+ __DRIcontext *driContextPriv,
+ unsigned major_version,
+ unsigned minor_version,
+ uint32_t flags,
+ unsigned *error,
+ void *sharedContextPrivate);
+
+/*======================================================================
+ * brw_misc_state.c
+ */
+void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
+ struct intel_mipmap_tree *stencil_mt,
+ uint32_t *out_tile_mask_x,
+ uint32_t *out_tile_mask_y);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw);
/*======================================================================
* brw_queryobj.c
* brw_state_dump.c
*/
void brw_debug_batch(struct intel_context *intel);
+void brw_annotate_aub(struct intel_context *intel);
/*======================================================================
* brw_tex.c
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+/* brw_vs.c */
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
+
+/* brw_wm_surface_state.c */
+void brw_init_surface_formats(struct brw_context *brw);
+void
+brw_update_sol_surface(struct brw_context *brw,
+ struct gl_buffer_object *buffer_obj,
+ uint32_t *out_offset, unsigned num_vector_components,
+ unsigned stride_dwords, unsigned offset_dwords);
+void brw_upload_ubo_surfaces(struct brw_context *brw,
+ struct gl_shader *shader,
+ uint32_t *surf_offsets);
+
+/* gen6_sol.c */
+void
+brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+ struct gl_transform_feedback_object *obj);
+void
+brw_end_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj);
+
+/* gen7_sol_state.c */
+void
+gen7_end_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj);
+
+/* brw_blorp_blit.cpp */
+GLbitfield
+brw_blorp_framebuffer(struct intel_context *intel,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter);
+
+/* gen6_multisample_state.c */
+void
+gen6_emit_3dstate_multisample(struct brw_context *brw,
+ unsigned num_samples);
+void
+gen6_emit_3dstate_sample_mask(struct brw_context *brw,
+ unsigned num_samples, float coverage,
+ bool coverage_invert);
+
+/* gen7_urb.c */
+void
+gen7_allocate_push_constants(struct brw_context *brw);
+
+void
+gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
+ GLuint vs_size, GLuint vs_start);
+
+
+
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
return (const struct brw_fragment_program *) p;
}
-static inline
-float convert_param(enum param_conversion conversion, const float *param)
-{
- union {
- float f;
- uint32_t u;
- int32_t i;
- } fi;
-
- switch (conversion) {
- case PARAM_NO_CONVERT:
- return *param;
- case PARAM_CONVERT_F2I:
- fi.i = *param;
- return fi.f;
- case PARAM_CONVERT_F2U:
- fi.u = *param;
- return fi.f;
- case PARAM_CONVERT_F2B:
- if (*param != 0.0)
- fi.i = 1;
- else
- fi.i = 0;
- return fi.f;
- case PARAM_CONVERT_ZERO:
- return 0.0;
- default:
- return *param;
- }
-}
-
/**
* Pre-gen6, the register file of the EUs was shared between threads,
* and each thread used some subset allocated on a 16-register block
return brw->cache.bo->offset + prog_offset;
}
-GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
+bool brw_do_cubemap_normalize(struct exec_list *instructions);
+bool brw_lower_texture_gradients(struct exec_list *instructions);
+
+struct opcode_desc {
+ char *name;
+ int nsrc;
+ int ndst;
+};
+
+extern const struct opcode_desc opcode_descs[128];
+
+#ifdef __cplusplus
+}
+#endif
#endif