BRW_STATE_WM_INPUT_DIMENSIONS,
BRW_STATE_PSP,
BRW_STATE_WM_SURFACES,
- BRW_STATE_BINDING_TABLE,
+ BRW_STATE_VS_BINDING_TABLE,
+ BRW_STATE_GS_BINDING_TABLE,
+ BRW_STATE_PS_BINDING_TABLE,
BRW_STATE_INDICES,
BRW_STATE_VERTICES,
BRW_STATE_BATCH,
- BRW_STATE_DEPTH_BUFFER,
BRW_STATE_NR_WM_SURFACES,
BRW_STATE_NR_VS_SURFACES,
BRW_STATE_INDEX_BUFFER,
BRW_STATE_VS_CONSTBUF,
- BRW_STATE_WM_CONSTBUF
+ BRW_STATE_WM_CONSTBUF,
+ BRW_STATE_PROGRAM_CACHE,
+ BRW_STATE_STATE_BASE_ADDRESS,
};
#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE)
#define BRW_NEW_WM_INPUT_DIMENSIONS (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
#define BRW_NEW_PSP (1 << BRW_STATE_PSP)
#define BRW_NEW_WM_SURFACES (1 << BRW_STATE_WM_SURFACES)
-#define BRW_NEW_BINDING_TABLE (1 << BRW_STATE_BINDING_TABLE)
+#define BRW_NEW_VS_BINDING_TABLE (1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE (1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE (1 << BRW_STATE_PS_BINDING_TABLE)
#define BRW_NEW_INDICES (1 << BRW_STATE_INDICES)
#define BRW_NEW_VERTICES (1 << BRW_STATE_VERTICES)
/**
* Used for any batch entry with a relocated pointer that will be used
* by any 3D rendering.
*/
-#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
+#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
/** \see brw.state.depth_region */
-#define BRW_NEW_DEPTH_BUFFER (1 << BRW_STATE_DEPTH_BUFFER)
-#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
-#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
-#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
-#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
+#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
+#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint cache;
};
+enum state_struct_type {
+ AUB_TRACE_VS_STATE = 1,
+ AUB_TRACE_GS_STATE = 2,
+ AUB_TRACE_CLIP_STATE = 3,
+ AUB_TRACE_SF_STATE = 4,
+ AUB_TRACE_WM_STATE = 5,
+ AUB_TRACE_CC_STATE = 6,
+ AUB_TRACE_CLIP_VP_STATE = 7,
+ AUB_TRACE_SF_VP_STATE = 8,
+ AUB_TRACE_CC_VP_STATE = 0x9,
+ AUB_TRACE_SAMPLER_STATE = 0xa,
+ AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb,
+ AUB_TRACE_SCRATCH_SPACE = 0xc,
+ AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd,
+
+ AUB_TRACE_SCISSOR_STATE = 0x15,
+ AUB_TRACE_BLEND_STATE = 0x16,
+ AUB_TRACE_DEPTH_STENCIL_STATE = 0x17,
+
+ /* Not written to .aub files the same way the structures above are. */
+ AUB_TRACE_NO_TYPE = 0x100,
+ AUB_TRACE_BINDING_TABLE = 0x101,
+ AUB_TRACE_SURFACE_STATE = 0x102,
+ AUB_TRACE_VS_CONSTANTS = 0x103,
+ AUB_TRACE_WM_CONSTANTS = 0x104,
+};
/** Subclass of Mesa vertex program */
struct brw_vertex_program {
GLuint first_curbe_grf;
GLuint first_curbe_grf_16;
- GLuint total_grf;
- GLuint total_grf_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
/** 32-bit hash of the key data */
GLuint hash;
GLuint key_size; /* for variable-sized keys */
+ GLuint aux_size;
const void *key;
- drm_intel_bo *bo;
+ uint32_t offset;
+ uint32_t size;
struct brw_cache_item *next;
};
struct brw_context *brw;
struct brw_cache_item **items;
+ drm_intel_bo *bo;
GLuint size, n_items;
- char *name[BRW_MAX_CACHE];
-
- /* Record of the last BOs chosen for each cache_id. Used to set
- * brw->state.dirty.cache when a new cache item is chosen.
- */
- drm_intel_bo *last_bo[BRW_MAX_CACHE];
+ uint32_t next_offset;
+ bool bo_used_by_gpu;
};
struct {
struct brw_state_flags dirty;
-
- /**
- * \name Cached region pointers
- *
- * When the draw buffer is updated, often the depth buffer is not
- * changed. Caching the pointer to the buffer's region allows us to
- * detect when the buffer has in fact changed, and allows us to avoid
- * updating the buffer's GPU state when it has not.
- *
- * The original of each cached pointer is an instance of
- * \c intel_renderbuffer.region.
- *
- * \see brw_set_draw_region()
- *
- * \{
- */
-
- /** \see struct brw_tracked_state brw_depthbuffer */
- struct intel_region *depth_region;
-
- /** \} */
-
/**
* List of buffers accumulated in brw_validate_state to receive
* drm_intel_bo_check_aperture treatment before exec, so we can
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- int validated_bo_count;
+ unsigned int validated_bo_count;
} state;
struct brw_cache cache;
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
- drm_intel_bo *prog_bo;
drm_intel_bo *const_bo;
+ /** Offset in the program cache to the VS program */
+ uint32_t prog_offset;
uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
struct brw_gs_prog_data *prog_data;
GLboolean prog_active;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
uint32_t state_offset;
- drm_intel_bo *prog_bo;
} gs;
struct {
struct brw_clip_prog_data *prog_data;
- drm_intel_bo *prog_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
/* Offset in the batch to the CLIP state on pre-gen6. */
uint32_t state_offset;
struct {
struct brw_sf_prog_data *prog_data;
- drm_intel_bo *prog_bo;
+ /** Offset in the program cache to the CLIP program pre-gen6 */
+ uint32_t prog_offset;
uint32_t state_offset;
uint32_t vp_offset;
} sf;
GLuint sampler_count;
uint32_t sampler_offset;
+ /** Offset in the program cache to the WM program */
+ uint32_t prog_offset;
+
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_WM_MAX_SURF];
uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
- drm_intel_bo *prog_bo;
drm_intel_bo *const_bo; /* pull constant buffer. */
/**
* This is offset in the batch to the push constants on gen6.
* Pre-gen6, push constants live in the CURBE.
*/
uint32_t push_const_offset;
+
+ /** @{ register allocator */
+
+ struct ra_regs *regs;
+
+ /** Array of the ra classes for the unaligned contiguous
+ * register block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+
+ /** @} */
} wm;
struct {
- /* gen4 */
- drm_intel_bo *prog_bo;
-
uint32_t state_offset;
uint32_t blend_state_offset;
uint32_t depth_stencil_state_offset;
int num_prepare_atoms, num_emit_atoms;
struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
+
+ /* If (INTEL_DEBUG & DEBUG_BATCH) */
+ struct {
+ uint32_t offset;
+ uint32_t size;
+ enum state_struct_type type;
+ } *state_batch_list;
+ int state_batch_count;
};
}
}
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity. The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+ return ALIGN(reg_count, 16) / 16 - 1;
+}
+
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+ uint32_t prog_offset)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 5) {
+ /* Using state base address. */
+ return prog_offset;
+ }
+
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ state_offset,
+ brw->cache.bo,
+ prog_offset,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+ return brw->cache.bo->offset + prog_offset;
+}
+
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif