};
-/**
- * Bitmask indicating which fragment shader inputs represent varyings (and
- * hence have to be delivered to the fragment shader by the SF/SBE stage).
- */
-#define BRW_FS_VARYING_INPUT_MASK \
- (BITFIELD64_RANGE(0, VARYING_SLOT_MAX) & \
- ~VARYING_BIT_POS & ~VARYING_BIT_FACE)
-
-
struct brw_sf_prog_data {
GLuint urb_read_length;
GLuint total_grf;
};
-/**
- * We always program SF to start reading at an offset of 1 (2 varying slots)
- * from the start of the vertex URB entry. This causes it to skip:
- * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
- * - VARYING_SLOT_PSIZ and VARYING_SLOT_POS on gen6+
- */
-#define BRW_SF_URB_ENTRY_READ_OFFSET 1
-
-
struct brw_clip_prog_data {
GLuint curb_read_length; /* user planes? */
GLuint clip_mode;
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
-/** Max number of render targets in a shader */
-#define BRW_MAX_DRAW_BUFFERS 8
-
/** Max number of UBOs in a shader */
#define BRW_MAX_UBO 14
/** Max number of image uniforms in a shader */
#define BRW_MAX_IMAGES 32
-/**
- * Max number of binding table entries used for stream output.
- *
- * From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
- * minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
- *
- * On Gen6, the size of transform feedback data is limited not by the number
- * of components but by the number of binding table entries we set aside. We
- * use one binding table entry for a float, one entry for a vector, and one
- * entry per matrix column. Since the only way we can communicate our
- * transform feedback capabilities to the client is via
- * MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS, we need to plan for the
- * worst case, in which all the varyings are floats, so we use up one binding
- * table entry per component. Therefore we need to set aside at least 64
- * binding table entries for use by transform feedback.
- *
- * Note: since we don't currently pack varyings, it is currently impossible
- * for the client to actually use up all of these binding table entries--if
- * all of their varyings were floats, they would run out of varying slots and
- * fail to link. But that's a bug, so it seems prudent to go ahead and
- * allocate the number of binding table entries we will need once the bug is
- * fixed.
- */
-#define BRW_MAX_SOL_BINDINGS 64
-
/** Maximum number of actual buffers used for stream output */
#define BRW_MAX_SOL_BUFFERS 4
2 + /* shader time, pull constants */ \
1 /* cs num work groups */)
-#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-
-/**
- * Stride in bytes between shader_time entries.
- *
- * We separate entries by a cacheline to reduce traffic between EUs writing to
- * different entries.
- */
-#define SHADER_TIME_STRIDE 64
-
struct brw_cache {
struct brw_context *brw;
} saved;
};
-#define MAX_GS_INPUT_VERTICES 6
-
#define BRW_MAX_XFB_STREAMS 4
struct brw_transform_feedback_object {
struct gen_l3_config;
+enum brw_query_kind {
+ PIPELINE_STATS
+};
+
+struct brw_perf_query_info
+{
+ enum brw_query_kind kind;
+ const char *name;
+ struct brw_perf_query_counter *counters;
+ int n_counters;
+ size_t data_size;
+};
+
/**
* brw_context is derived from gl_context.
*/
bool supported;
} predicate;
+ struct {
+ struct brw_perf_query_info *queries;
+ int n_queries;
+
+ int n_active_pipeline_stats_queries;
+ } perfquery;
+
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[76];
const struct brw_tracked_state compute_atoms[11];
void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
/** hsw_queryobj.c */
+void hsw_overflow_result_to_gpr0(struct brw_context *brw,
+ struct brw_query_object *query,
+ int count);
void hsw_init_queryobj_functions(struct dd_function_table *functions);
/** brw_conditional_render.c */
void brwInitFragProgFuncs( struct dd_function_table *functions );
-/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-static inline int
-brw_get_scratch_size(int size)
-{
- return MAX2(1024, util_next_power_of_two(size));
-}
void brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size);
void brw_alloc_stage_scratch(struct brw_context *brw,
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
-/* brw_fs_reg_allocate.cpp
- */
-void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
-
-/* brw_vec4_reg_allocate.cpp */
-void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
-
-/* brw_disasm.c */
-int brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
- struct brw_inst *inst, bool is_compacted);
-
/* brw_vs.c */
gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
struct gl_renderbuffer *rb);
uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+/* brw_performance_query.c */
+void brw_init_performance_queries(struct brw_context *brw);
+
/* intel_buffer_objects.c */
int brw_bo_map(struct brw_context *brw, drm_intel_bo *bo, int write_enable,
const char *bo_name);
return (const struct brw_program *) p;
}
-/**
- * Pre-gen6, the register file of the EUs was shared between threads,
- * and each thread used some subset allocated on a 16-register block
- * granularity. The unit states wanted these block counts.
- */
-static inline int
-brw_register_blocks(int reg_count)
-{
- return ALIGN(reg_count, 16) / 16 - 1;
-}
-
static inline uint32_t
brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
uint32_t prog_offset)
return brw->cache.bo->offset64 + prog_offset;
}
-bool brw_do_cubemap_normalize(struct exec_list *instructions);
-
-extern const char * const conditional_modifier[16];
-extern const char *const pred_ctrl_align16[16];
-
static inline bool
brw_depth_writes_enabled(const struct brw_context *brw)
{