struct brw_context;
-#define BRW_NEW_URB_FENCE 0x1
-#define BRW_NEW_FRAGMENT_PROGRAM 0x2
-#define BRW_NEW_VERTEX_PROGRAM 0x4
-#define BRW_NEW_INPUT_DIMENSIONS 0x8
-#define BRW_NEW_CURBE_OFFSETS 0x10
-#define BRW_NEW_REDUCED_PRIMITIVE 0x20
-#define BRW_NEW_PRIMITIVE 0x40
-#define BRW_NEW_CONTEXT 0x80
-#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
-#define BRW_NEW_PSP 0x800
-#define BRW_NEW_WM_SURFACES 0x1000
-#define BRW_NEW_BINDING_TABLE 0x2000
-#define BRW_NEW_INDICES 0x4000
-#define BRW_NEW_VERTICES 0x8000
+enum brw_state_id {
+ BRW_STATE_URB_FENCE,
+ BRW_STATE_FRAGMENT_PROGRAM,
+ BRW_STATE_VERTEX_PROGRAM,
+ BRW_STATE_INPUT_DIMENSIONS,
+ BRW_STATE_CURBE_OFFSETS,
+ BRW_STATE_REDUCED_PRIMITIVE,
+ BRW_STATE_PRIMITIVE,
+ BRW_STATE_CONTEXT,
+ BRW_STATE_WM_INPUT_DIMENSIONS,
+ BRW_STATE_PSP,
+ BRW_STATE_WM_SURFACES,
+ BRW_STATE_VS_BINDING_TABLE,
+ BRW_STATE_GS_BINDING_TABLE,
+ BRW_STATE_PS_BINDING_TABLE,
+ BRW_STATE_INDICES,
+ BRW_STATE_VERTICES,
+ BRW_STATE_BATCH,
+ BRW_STATE_NR_WM_SURFACES,
+ BRW_STATE_NR_VS_SURFACES,
+ BRW_STATE_INDEX_BUFFER,
+ BRW_STATE_VS_CONSTBUF,
+ BRW_STATE_WM_CONSTBUF,
+ BRW_STATE_PROGRAM_CACHE,
+ BRW_STATE_STATE_BASE_ADDRESS,
+};
+
+#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE)
+#define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM)
+#define BRW_NEW_INPUT_DIMENSIONS (1 << BRW_STATE_INPUT_DIMENSIONS)
+#define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS)
+#define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PRIMITIVE (1 << BRW_STATE_PRIMITIVE)
+#define BRW_NEW_CONTEXT (1 << BRW_STATE_CONTEXT)
+#define BRW_NEW_WM_INPUT_DIMENSIONS (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
+#define BRW_NEW_PSP (1 << BRW_STATE_PSP)
+#define BRW_NEW_WM_SURFACES (1 << BRW_STATE_WM_SURFACES)
+#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_INDICES (1 << BRW_STATE_INDICES)
+#define BRW_NEW_VERTICES (1 << BRW_STATE_VERTICES)
/**
* Used for any batch entry with a relocated pointer that will be used
* by any 3D rendering.
*/
-#define BRW_NEW_BATCH 0x10000
-/** brw->depth_region updated */
-#define BRW_NEW_DEPTH_BUFFER 0x20000
-#define BRW_NEW_NR_WM_SURFACES 0x40000
-#define BRW_NEW_NR_VS_SURFACES 0x80000
-#define BRW_NEW_INDEX_BUFFER 0x100000
-#define BRW_NEW_VS_CONSTBUF 0x200000
-#define BRW_NEW_WM_CONSTBUF 0x400000
+#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
+/** \see brw.state.depth_region */
+#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
+#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
+#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint cache;
};
+enum state_struct_type {
+ AUB_TRACE_VS_STATE = 1,
+ AUB_TRACE_GS_STATE = 2,
+ AUB_TRACE_CLIP_STATE = 3,
+ AUB_TRACE_SF_STATE = 4,
+ AUB_TRACE_WM_STATE = 5,
+ AUB_TRACE_CC_STATE = 6,
+ AUB_TRACE_CLIP_VP_STATE = 7,
+ AUB_TRACE_SF_VP_STATE = 8,
+ AUB_TRACE_CC_VP_STATE = 0x9,
+ AUB_TRACE_SAMPLER_STATE = 0xa,
+ AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb,
+ AUB_TRACE_SCRATCH_SPACE = 0xc,
+ AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd,
+
+ AUB_TRACE_SCISSOR_STATE = 0x15,
+ AUB_TRACE_BLEND_STATE = 0x16,
+ AUB_TRACE_DEPTH_STENCIL_STATE = 0x17,
+
+ /* Not written to .aub files the same way the structures above are. */
+ AUB_TRACE_NO_TYPE = 0x100,
+ AUB_TRACE_BINDING_TABLE = 0x101,
+ AUB_TRACE_SURFACE_STATE = 0x102,
+ AUB_TRACE_VS_CONSTANTS = 0x103,
+ AUB_TRACE_WM_CONSTANTS = 0x104,
+};
/** Subclass of Mesa vertex program */
struct brw_vertex_program {
GLuint urb_read_length;
GLuint first_curbe_grf;
- GLuint total_grf;
+ GLuint first_curbe_grf_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
GLboolean error;
int dispatch_width;
+ uint32_t prog_offset_16;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
- BRW_SAMPLER_DEFAULT_COLOR,
BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG,
/** 32-bit hash of the key data */
GLuint hash;
GLuint key_size; /* for variable-sized keys */
+ GLuint aux_size;
const void *key;
- drm_intel_bo **reloc_bufs;
- GLuint nr_reloc_bufs;
- drm_intel_bo *bo;
+ uint32_t offset;
+ uint32_t size;
struct brw_cache_item *next;
};
struct brw_context *brw;
struct brw_cache_item **items;
+ drm_intel_bo *bo;
GLuint size, n_items;
- char *name[BRW_MAX_CACHE];
-
- /* Record of the last BOs chosen for each cache_id. Used to set
- * brw->state.dirty.cache when a new cache item is chosen.
- */
- drm_intel_bo *last_bo[BRW_MAX_CACHE];
+ uint32_t next_offset;
+ bool bo_used_by_gpu;
};
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
-#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
*/
#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32)
+struct brw_vertex_buffer {
+ /** Buffer object containing the uploaded vertex data */
+ drm_intel_bo *bo;
+ uint32_t offset;
+ /** Byte stride between elements in the uploaded array */
+ GLuint stride;
+};
struct brw_vertex_element {
const struct gl_client_array *glarray;
+ int buffer;
+
/** The corresponding Mesa vertex attribute */
gl_vert_attrib attrib;
/** Size of a complete element */
GLuint element_size;
- /** Number of uploaded elements for this input. */
- GLuint count;
- /** Byte stride between elements in the uploaded array */
- GLuint stride;
/** Offset of the first element within the buffer object */
unsigned int offset;
- /** Buffer object containing the uploaded vertex data */
- drm_intel_bo *bo;
};
GLboolean has_negative_rhw_bug;
GLboolean has_aa_line_parameters;
GLboolean has_pln;
-;
+
struct {
struct brw_state_flags dirty;
-
- struct intel_region *depth_region;
-
/**
* List of buffers accumulated in brw_validate_state to receive
* drm_intel_bo_check_aperture treatment before exec, so we can
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- int validated_bo_count;
+ unsigned int validated_bo_count;
} state;
struct brw_cache cache;
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+ struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
+ struct {
+ uint32_t handle;
+ uint32_t offset;
+ uint32_t stride;
+ } current_buffers[VERT_ATTRIB_MAX];
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint nr_enabled;
+ GLuint nr_buffers, nr_current_buffers;
/* Summary of size and varying of active arrays, so we can check
* for changes to this state:
*/
struct brw_vertex_info info;
unsigned int min_index, max_index;
+
+ /* Offset from start of vertex buffer so we can avoid redefining
+ * the same VB packed over and over again.
+ */
+ unsigned int start_vertex_bias;
} vb;
struct {
*/
const struct _mesa_index_buffer *ib;
- /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
drm_intel_bo *bo;
- unsigned int offset;
- unsigned int size;
+ GLuint type;
+
/* Offset to index buffer index to use in CMD_3D_PRIM so that we can
* avoid re-uploading the IB packet over and over if we're actually
* referencing the same index buffer.
const struct gl_vertex_program *vertex_program;
const struct gl_fragment_program *fragment_program;
-
- /* For populating the gtt:
- */
- GLuint next_free_page;
-
/* hw-dependent 3DSTATE_VF_STATISTICS opcode */
uint32_t CMD_VF_STATISTICS;
/* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
GLboolean constrained;
+ GLuint max_vs_entries; /* Maximum number of VS entries */
+ GLuint max_gs_entries; /* Maximum number of GS entries */
+
GLuint nr_vs_entries;
GLuint nr_gs_entries;
GLuint nr_clip_entries;
GLuint nr_sf_entries;
GLuint nr_cs_entries;
- /* gen6 */
+ /* gen6:
+ * The length of each URB entry owned by the VS (or GS), as
+ * a number of 1024-bit (128-byte) rows. Should be >= 1.
+ *
+ * gen7: Same meaning, but in 512-bit (64-byte) rows.
+ */
GLuint vs_size;
-/* GLuint gs_size; */
-/* GLuint clip_size; */
-/* GLuint sf_size; */
-/* GLuint cs_size; */
+ GLuint gs_size;
GLuint vs_start;
GLuint gs_start;
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
- drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
drm_intel_bo *const_bo;
+ /** Offset in the program cache to the VS program */
+ uint32_t prog_offset;
+ uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
- drm_intel_bo *bind_bo;
uint32_t bind_bo_offset;
- drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
+
+ uint32_t push_const_offset; /* Offset in the batchbuffer */
+ int push_const_size; /* in 256-bit register increments */
} vs;
struct {
struct brw_gs_prog_data *prog_data;
GLboolean prog_active;
- drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
+ uint32_t state_offset;
} gs;
struct {
struct brw_clip_prog_data *prog_data;
- drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
- drm_intel_bo *vp_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
+
+ /* Offset in the batch to the CLIP state on pre-gen6. */
+ uint32_t state_offset;
+
+ /* As of gen6, this is the offset in the batch to the CLIP VP,
+ * instead of vp_bo.
+ */
+ uint32_t vp_offset;
} clip;
struct {
struct brw_sf_prog_data *prog_data;
- drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
- drm_intel_bo *vp_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
+ uint32_t state_offset;
+ uint32_t vp_offset;
} sf;
struct {
*/
GLbitfield input_size_masks[4];
- /** Array of surface default colors (texture border color) */
- drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+ /** offsets in the batch to sampler default colors (texture border color)
+ */
+ uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
GLuint render_surf;
GLuint nr_surfaces;
drm_intel_bo *scratch_bo;
GLuint sampler_count;
- drm_intel_bo *sampler_bo;
+ uint32_t sampler_offset;
+
+ /** Offset in the program cache to the WM program */
+ uint32_t prog_offset;
/** Binding table of pointers to surf_bo entries */
- drm_intel_bo *bind_bo;
uint32_t bind_bo_offset;
- drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
uint32_t surf_offset[BRW_WM_MAX_SURF];
+ uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
- drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
drm_intel_bo *const_bo; /* pull constant buffer. */
/**
- * This is the push constant BO on gen6.
+ * This is offset in the batch to the push constants on gen6.
*
* Pre-gen6, push constants live in the CURBE.
*/
- drm_intel_bo *push_const_bo;
- } wm;
+ uint32_t push_const_offset;
+ /** @{ register allocator */
- struct {
- /* gen4 */
- drm_intel_bo *prog_bo;
- drm_intel_bo *vp_bo;
+ struct ra_regs *regs;
+
+ /** Array of the ra classes for the unaligned contiguous
+ * register block sizes used.
+ */
+ int *classes;
- /* gen6 */
- drm_intel_bo *blend_state_bo;
- drm_intel_bo *depth_stencil_state_bo;
- drm_intel_bo *color_calc_state_bo;
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
- drm_intel_bo *state_bo;
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+
+ /** @} */
+ } wm;
+
+
+ struct {
uint32_t state_offset;
+ uint32_t blend_state_offset;
+ uint32_t depth_stencil_state_offset;
+ uint32_t vp_offset;
} cc;
struct {
/* Used to give every program string a unique id
*/
GLuint program_id;
+
+ int num_prepare_atoms, num_emit_atoms;
+ struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
+
+ /* If (INTEL_DEBUG & DEBUG_BATCH) */
+ struct {
+ uint32_t offset;
+ uint32_t size;
+ enum state_struct_type type;
+ } *state_batch_list;
+ int state_batch_count;
};
*/
void brw_upload_urb_fence(struct brw_context *brw);
-/* brw_cc.c */
-void brw_update_cc_vp(struct brw_context *brw);
-
/* brw_curbe.c
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
-/* brw_state.c */
-void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state);
-void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval);
-
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
}
}
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity. The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+ return ALIGN(reg_count, 16) / 16 - 1;
+}
+
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+ uint32_t prog_offset)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 5) {
+ /* Using state base address. */
+ return prog_offset;
+ }
+
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ state_offset,
+ brw->cache.bo,
+ prog_offset,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+ return brw->cache.bo->offset + prog_offset;
+}
+
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
-