#define BRWCONTEXT_INC
#include <stdbool.h>
-#include <string.h>
-#include "main/imports.h"
#include "main/macros.h"
-#include "main/mm.h"
#include "main/mtypes.h"
#include "brw_structs.h"
+#include "brw_compiler.h"
#include "intel_aub.h"
-#include "program/prog_parameter.h"
#ifdef __cplusplus
extern "C" {
#define virtual virt
#endif
-#include <drm.h>
#include <intel_bufmgr.h>
-#include <i915_drm.h>
#ifdef __cplusplus
#undef virtual
}
* enabled, it first passes them to a VS thread which is a good place
* for the driver to implement any active vertex shader.
*
+ * HS - Hull Shader (Tessellation Control Shader)
+ *
+ * TE - Tessellation Engine (Tessellation Primitive Generation)
+ *
+ * DS - Domain Shader (Tessellation Evaluation Shader)
+ *
* GS - Geometry Shader. This corresponds to a new DX10 concept. If
* enabled, incoming strips etc are passed to GS threads in individual
* line/triangle/point units. The GS thread may perform arbitary
struct brw_vue_prog_key;
struct brw_wm_prog_key;
struct brw_wm_prog_data;
+struct brw_cs_prog_key;
+struct brw_cs_prog_data;
enum brw_pipeline {
BRW_RENDER_PIPELINE,
BRW_CACHE_VS_PROG,
BRW_CACHE_FF_GS_PROG,
BRW_CACHE_GS_PROG,
+ BRW_CACHE_TCS_PROG,
+ BRW_CACHE_TES_PROG,
BRW_CACHE_CLIP_PROG,
+ BRW_CACHE_CS_PROG,
BRW_MAX_CACHE
};
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
BRW_STATE_FRAGMENT_PROGRAM,
BRW_STATE_GEOMETRY_PROGRAM,
+ BRW_STATE_TESS_CTRL_PROGRAM,
+ BRW_STATE_TESS_EVAL_PROGRAM,
BRW_STATE_VERTEX_PROGRAM,
BRW_STATE_CURBE_OFFSETS,
BRW_STATE_REDUCED_PRIMITIVE,
+ BRW_STATE_PATCH_PRIMITIVE,
BRW_STATE_PRIMITIVE,
BRW_STATE_CONTEXT,
BRW_STATE_PSP,
BRW_STATE_SURFACES,
- BRW_STATE_VS_BINDING_TABLE,
- BRW_STATE_GS_BINDING_TABLE,
- BRW_STATE_PS_BINDING_TABLE,
+ BRW_STATE_BINDING_TABLE_POINTERS,
BRW_STATE_INDICES,
BRW_STATE_VERTICES,
BRW_STATE_BATCH,
BRW_STATE_INDEX_BUFFER,
BRW_STATE_VS_CONSTBUF,
+ BRW_STATE_TCS_CONSTBUF,
+ BRW_STATE_TES_CONSTBUF,
BRW_STATE_GS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_VUE_MAP_VS,
BRW_STATE_VUE_MAP_GEOM_OUT,
BRW_STATE_TRANSFORM_FEEDBACK,
BRW_STATE_RASTERIZER_DISCARD,
BRW_STATE_STATS_WM,
BRW_STATE_UNIFORM_BUFFER,
BRW_STATE_ATOMIC_BUFFER,
+ BRW_STATE_IMAGE_UNITS,
BRW_STATE_META_IN_PROGRESS,
BRW_STATE_INTERPOLATION_MAP,
BRW_STATE_PUSH_CONSTANT_ALLOCATION,
BRW_STATE_CLIP_VP,
BRW_STATE_SAMPLER_STATE_TABLE,
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
+ BRW_STATE_COMPUTE_PROGRAM,
+ BRW_STATE_CS_WORK_GROUPS,
+ BRW_STATE_URB_SIZE,
BRW_NUM_STATE_BITS
};
#define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG)
#define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG)
#define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG)
+#define BRW_NEW_TCS_PROG_DATA (1ull << BRW_CACHE_TCS_PROG)
+#define BRW_NEW_TES_PROG_DATA (1ull << BRW_CACHE_TES_PROG)
#define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG)
+#define BRW_NEW_CS_PROG_DATA (1ull << BRW_CACHE_CS_PROG)
#define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE)
#define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM)
#define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM)
+#define BRW_NEW_TESS_EVAL_PROGRAM (1ull << BRW_STATE_TESS_EVAL_PROGRAM)
+#define BRW_NEW_TESS_CTRL_PROGRAM (1ull << BRW_STATE_TESS_CTRL_PROGRAM)
#define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM)
#define BRW_NEW_CURBE_OFFSETS (1ull << BRW_STATE_CURBE_OFFSETS)
#define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE)
#define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE)
#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT)
#define BRW_NEW_PSP (1ull << BRW_STATE_PSP)
#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_VS_BINDING_TABLE (1ull << BRW_STATE_VS_BINDING_TABLE)
-#define BRW_NEW_GS_BINDING_TABLE (1ull << BRW_STATE_GS_BINDING_TABLE)
-#define BRW_NEW_PS_BINDING_TABLE (1ull << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES)
#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES)
/**
/** \see brw.state.depth_region */
#define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_TCS_CONSTBUF (1ull << BRW_STATE_TCS_CONSTBUF)
+#define BRW_NEW_TES_CONSTBUF (1ull << BRW_STATE_TES_CONSTBUF)
#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS (1ull << BRW_STATE_VUE_MAP_VS)
#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM)
#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER)
#define BRW_NEW_ATOMIC_BUFFER (1ull << BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS)
#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS)
#define BRW_NEW_INTERPOLATION_MAP (1ull << BRW_STATE_INTERPOLATION_MAP)
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
#define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP)
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
+#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
+#define BRW_NEW_URB_SIZE (1ull << BRW_STATE_URB_SIZE)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
};
+/** Subclass of Mesa tessellation control program */
+struct brw_tess_ctrl_program {
+ struct gl_tess_ctrl_program program;
+ unsigned id; /**< serial no. to identify tess ctrl progs, never re-used */
+};
+
+
+/** Subclass of Mesa tessellation evaluation program */
+struct brw_tess_eval_program {
+ struct gl_tess_eval_program program;
+ unsigned id; /**< serial no. to identify tess eval progs, never re-used */
+};
+
+
/** Subclass of Mesa geometry program */
struct brw_geometry_program {
struct gl_geometry_program program;
bool compiled_once;
};
-/* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix brw_stage_prog_data_compare().
- */
-struct brw_stage_prog_data {
- struct {
- /** size of our binding table. */
- uint32_t size_bytes;
-
- /** @{
- * surface indices for the various groups of surfaces
- */
- uint32_t pull_constants_start;
- uint32_t texture_start;
- uint32_t gather_texture_start;
- uint32_t ubo_start;
- uint32_t abo_start;
- uint32_t image_start;
- uint32_t shader_time_start;
- /** @} */
- } binding_table;
-
- GLuint nr_params; /**< number of float params/constants */
- GLuint nr_pull_params;
-
- unsigned curb_read_length;
- unsigned total_scratch;
-
- /**
- * Register where the thread expects to find input data from the URB
- * (typically uniforms, followed by vertex or fragment attributes).
- */
- unsigned dispatch_grf_start_reg;
-
- bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
-
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
- *
- * These must be the last fields of the struct (see
- * brw_stage_prog_data_compare()).
- */
- const gl_constant_value **param;
- const gl_constant_value **pull_param;
-};
-
-/* Data about a particular attempt to compile a program. Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- *
- * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_wm_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint num_varying_inputs;
-
- GLuint dispatch_grf_start_reg_16;
- GLuint reg_blocks;
- GLuint reg_blocks_16;
-
- struct {
- /** @{
- * surface indices the WM-specific surfaces
- */
- uint32_t render_target_start;
- /** @} */
- } binding_table;
-
- uint8_t computed_depth_mode;
-
- bool no_8;
- bool dual_src_blend;
- bool uses_pos_offset;
- bool uses_omask;
- bool uses_kill;
- uint32_t prog_offset_16;
-
- /**
- * Mask of which interpolation modes are required by the fragment shader.
- * Used in hardware setup on gen6+.
- */
- uint32_t barycentric_interp_modes;
-
- /**
- * Map from gl_varying_slot to the position within the FS setup data
- * payload where the varying's attribute vertex deltas should be delivered.
- * For varying slots that are not used by the FS, the value is -1.
- */
- int urb_setup[VARYING_SLOT_MAX];
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot. The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
- BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_PAD,
- /**
- * Technically this is not a varying but just a placeholder that
- * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
- * builtin variable to be compiled correctly. see compile_sf_prog() for
- * more info.
- */
- BRW_VARYING_SLOT_PNTC,
- BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
- /**
- * Bitfield representing all varying slots that are (a) stored in this VUE
- * map, and (b) actually written by the shader. Does not include any of
- * the additional varying slots defined in brw_varying_slot.
- */
- GLbitfield64 slots_valid;
-
- /**
- * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
- * not stored in a slot (because they are not written, or because
- * additional processing is applied before storing them in the VUE), the
- * value is -1.
- */
- signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Map from VUE slot to gl_varying_slot value. For slots that do not
- * directly correspond to a gl_varying_slot, the value comes from
- * brw_varying_slot.
- *
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
- */
- signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Total number of VUE slots in use
- */
- int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
- return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
-{
- return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid);
-
-
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
unsigned svbi_postincrement_value;
};
-
-/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_vue_prog_data {
- struct brw_stage_prog_data base;
- struct brw_vue_map vue_map;
-
- GLuint urb_read_length;
- GLuint total_grf;
-
- /* Used for calculating urb partitions. In the VS, this is the size of the
- * URB entry used for both input and output to the thread. In the GS, this
- * is the size of the URB entry used for output.
- */
- GLuint urb_entry_size;
-
- bool simd8;
-};
-
-
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_vs_prog_data {
- struct brw_vue_prog_data base;
-
- GLbitfield64 inputs_read;
-
- bool uses_vertexid;
- bool uses_instanceid;
-};
-
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
/** Max number of render targets in a shader */
#define BRW_MAX_DRAW_BUFFERS 8
+/** Max number of UBOs in a shader */
+#define BRW_MAX_UBO 14
+
+/** Max number of SSBOs in a shader */
+#define BRW_MAX_SSBO 12
+
/** Max number of atomic counter buffer objects in a shader */
#define BRW_MAX_ABO 16
#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \
BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
- 12 + /* ubo */ \
+ BRW_MAX_UBO + \
+ BRW_MAX_SSBO + \
BRW_MAX_ABO + \
BRW_MAX_IMAGES + \
- 2 /* shader time, pull constants */)
+ 2 + /* shader time, pull constants */ \
+ 1 /* cs num work groups */)
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_gs_prog_data
-{
- struct brw_vue_prog_data base;
-
- /**
- * Size of an output vertex, measured in HWORDS (32 bytes).
- */
- unsigned output_vertex_size_hwords;
-
- unsigned output_topology;
-
- /**
- * Size of the control data (cut bits or StreamID bits), in hwords (32
- * bytes). 0 if there is no control data.
- */
- unsigned control_data_header_size_hwords;
-
- /**
- * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
- * if the control data is StreamID bits, or
- * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
- * Ignored if control_data_header_size is 0.
- */
- unsigned control_data_format;
-
- bool include_primitive_id;
-
- int invocations;
-
- /**
- * Dispatch mode, can be any of:
- * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
- * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
- * GEN7_GS_DISPATCH_MODE_SINGLE
- */
- int dispatch_mode;
-
- /**
- * Gen6 transform feedback enabled flag.
- */
- bool gen6_xfb_enabled;
-
- /**
- * Gen6: Provoking vertex convention for odd-numbered triangles
- * in tristrips.
- */
- GLuint pv_first:1;
-
- /**
- * Gen6: Number of varyings that are output to transform feedback.
- */
- GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * gl_varying_slot that should be streamed out through that binding table
- * entry.
- */
- unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * swizzles that should be used when streaming out data through that
- * binding table entry.
- */
- unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
/**
* Stride in bytes between shader_time entries.
*
};
-typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
-typedef void (*cache_aux_free_func)(const void *aux);
-
struct brw_cache {
struct brw_context *brw;
uint32_t next_offset;
bool bo_used_by_gpu;
-
- /**
- * Optional functions used in determining whether the prog_data for a new
- * cache item matches an existing cache item (in case there's relevant data
- * outside of the prog_data). If NULL, a plain memcmp is done.
- */
- cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
- /** Optional functions for freeing other pointers attached to a prog_data. */
- cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
enum shader_time_shader_type {
ST_NONE,
ST_VS,
- ST_VS_WRITTEN,
- ST_VS_RESET,
+ ST_TCS,
+ ST_TES,
ST_GS,
- ST_GS_WRITTEN,
- ST_GS_RESET,
ST_FS8,
- ST_FS8_WRITTEN,
- ST_FS8_RESET,
ST_FS16,
- ST_FS16_WRITTEN,
- ST_FS16_RESET,
+ ST_CS,
};
struct brw_vertex_buffer {
bool flushed;
};
-struct intel_sync_object {
- struct gl_sync_object Base;
-
- /** Batch associated with this sync object */
- drm_intel_bo *bo;
-};
-
enum brw_gpu_ring {
UNKNOWN_RING,
RENDER_RING,
drm_intel_bo *bo;
/** Last BO submitted to the hardware. Used for glFinish(). */
drm_intel_bo *last_bo;
- /** BO for post-sync nonzero writes for gen6 workaround. */
- drm_intel_bo *workaround_bo;
+#ifdef DEBUG
uint16_t emit, total;
- uint16_t used, reserved_space;
+#endif
+ uint16_t reserved_space;
+ uint32_t *map_next;
uint32_t *map;
uint32_t *cpu_map;
#define BATCH_SZ (8192*sizeof(uint32_t))
enum brw_gpu_ring ring;
bool needs_sol_reset;
- uint8_t pipe_controls_since_last_cs_stall;
-
struct {
- uint16_t used;
+ uint32_t *map_next;
int reloc_count;
} saved;
};
+#define MAX_GS_INPUT_VERTICES 6
+
#define BRW_MAX_XFB_STREAMS 4
struct brw_transform_feedback_object {
uint32_t sampler_offset;
};
+enum brw_predicate_state {
+ /* The first two states are used if we can determine whether to draw
+ * without having to look at the values in the query object buffer. This
+ * will happen if there is no conditional render in progress, if the query
+ * object is already completed or if something else has already added
+ * samples to the preliminary result such as via a BLT command.
+ */
+ BRW_PREDICATE_STATE_RENDER,
+ BRW_PREDICATE_STATE_DONT_RENDER,
+ /* In this case whether to draw or not depends on the result of an
+ * MI_PREDICATE command so the predicate enable bit needs to be checked.
+ */
+ BRW_PREDICATE_STATE_USE_BIT
+};
+
+struct shader_times;
+
+struct brw_l3_config;
/**
* brw_context is derived from gl_context.
unsigned unit,
uint32_t *surf_offset,
bool for_gather);
- void (*update_renderbuffer_surface)(struct brw_context *brw,
- struct gl_renderbuffer *rb,
- bool layered,
- unsigned unit);
-
+ uint32_t (*update_renderbuffer_surface)(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ bool layered, unsigned unit,
+ uint32_t surf_index);
+
+ void (*emit_texture_surface_state)(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ GLenum target,
+ unsigned min_layer,
+ unsigned max_layer,
+ unsigned min_level,
+ unsigned max_level,
+ unsigned format,
+ unsigned swizzle,
+ uint32_t *surf_offset,
+ bool rw, bool for_gather);
void (*emit_buffer_surface_state)(struct brw_context *brw,
uint32_t *out_offset,
drm_intel_bo *bo,
drm_intel_context *hw_ctx;
+ /** BO for post-sync nonzero writes for gen6 workaround. */
+ drm_intel_bo *workaround_bo;
+ uint8_t pipe_controls_since_last_cs_stall;
+
/**
* Set of drm_intel_bo * that have been rendered to within this batchbuffer
* and would need flushing before being used from another cache domain that
* Number of resets observed in the system at context creation.
*
* This is tracked in the context so that we can determine that another
- * reset has occured.
+ * reset has occurred.
*/
uint32_t reset_count;
} upload;
/**
- * Set if rendering has occured to the drawable's front buffer.
+ * Set if rendering has occurred to the drawable's front buffer.
*
* This is used in the DRI2 case to detect that glFlush should also copy
* the contents of the fake front buffer to the real front buffer.
int gen;
int gt;
- /* GT revision. This will be -1 if the revision couldn't be determined (eg,
- * if the kernel doesn't support the query).
- */
- int revision;
bool is_g4x;
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+ bool is_broxton;
bool has_hiz;
bool has_separate_stencil;
bool has_pln;
bool no_simd8;
bool use_rep_send;
- bool scalar_vs;
+ bool use_resource_streamer;
+
+ /**
+ * Whether LRI can be used to write register values from the batch buffer.
+ */
+ bool can_do_pipelined_register_writes;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
GLuint NewGLState;
struct {
- struct brw_state_flags dirty;
struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
} state;
+ enum brw_pipeline last_pipeline;
+
struct brw_cache cache;
/** IDs for meta stencil blit shader programs. */
uint32_t draw_params_offset;
} draw;
+ struct {
+ /**
+ * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+ * an indirect call, and num_work_groups_offset is valid. Otherwise,
+ * num_work_groups is set based on glDispatchCompute.
+ */
+ drm_intel_bo *num_work_groups_bo;
+ GLintptr num_work_groups_offset;
+ const GLuint *num_work_groups;
+ } compute;
+
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
*/
const struct gl_vertex_program *vertex_program;
const struct gl_geometry_program *geometry_program;
+ const struct gl_tess_ctrl_program *tess_ctrl_program;
+ const struct gl_tess_eval_program *tess_eval_program;
const struct gl_fragment_program *fragment_program;
+ const struct gl_compute_program *compute_program;
/**
* Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
* Platform specific constants containing the maximum number of threads
* for each pipeline stage.
*/
- int max_vs_threads;
- int max_hs_threads;
- int max_ds_threads;
- int max_gs_threads;
- int max_wm_threads;
+ unsigned max_vs_threads;
+ unsigned max_hs_threads;
+ unsigned max_ds_threads;
+ unsigned max_gs_threads;
+ unsigned max_wm_threads;
+ unsigned max_cs_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
GLuint clip_start;
GLuint sf_start;
GLuint cs_start;
- GLuint size; /* Hardware URB size, in KB. */
+ /**
+ * URB size in the current configuration. The units this is expressed
+ * in are somewhat inconsistent, see brw_device_info::urb::size.
+ *
+ * FINISHME: Represent the URB size consistently in KB on all platforms.
+ */
+ GLuint size;
/* True if the most recently sent _3DSTATE_URB message allocated
* URB space for the GS.
GLuint curbe_offset;
} curbe;
- /**
- * Layout of vertex data exiting the vertex shader.
- *
- * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
- */
- struct brw_vue_map vue_map_vs;
-
/**
* Layout of vertex data exiting the geometry portion of the pipleine.
- * This comes from the geometry shader if one exists, otherwise from the
- * vertex shader.
+ * This comes from the last enabled shader stage (GS, DS, or VS).
*
* BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
*/
struct brw_vs_prog_data *prog_data;
} vs;
+ struct {
+ struct brw_stage_state base;
+ struct brw_tcs_prog_data *prog_data;
+
+ /**
+ * True if the 3DSTATE_HS command most recently emitted to the 3D
+ * pipeline enabled the HS; false otherwise.
+ */
+ bool enabled;
+ } tcs;
+
+ struct {
+ struct brw_stage_state base;
+ struct brw_tes_prog_data *prog_data;
+
+ /**
+ * True if the 3DSTATE_DS command most recently emitted to the 3D
+ * pipeline enabled the DS; false otherwise.
+ */
+ bool enabled;
+ } tes;
+
struct {
struct brw_stage_state base;
struct brw_gs_prog_data *prog_data;
*/
drm_intel_bo *multisampled_null_render_target_bo;
uint32_t fast_clear_op;
+
+ float offset_clamp;
} wm;
+ struct {
+ struct brw_stage_state base;
+ struct brw_cs_prog_data *prog_data;
+ } cs;
+
+ /* RS hardware binding table */
+ struct {
+ drm_intel_bo *bo;
+ uint32_t next_offset;
+ } hw_bt_pool;
struct {
uint32_t state_offset;
bool begin_emitted;
} query;
+ struct {
+ enum brw_predicate_state state;
+ bool supported;
+ } predicate;
+
struct {
/** A map from pipeline statistics counter IDs to MMIO addresses. */
const int *statistics_registers;
} perfmon;
int num_atoms[BRW_NUM_PIPELINES];
- const struct brw_tracked_state render_atoms[57];
- const struct brw_tracked_state compute_atoms[1];
+ const struct brw_tracked_state render_atoms[76];
+ const struct brw_tracked_state compute_atoms[10];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
uint32_t offset;
uint32_t size;
enum aub_state_struct_type type;
+ int index;
} *state_batch_list;
int state_batch_count;
uint32_t num_instances;
int basevertex;
+ struct {
+ const struct brw_l3_config *config;
+ } l3;
+
struct {
drm_intel_bo *bo;
- struct gl_shader_program **shader_programs;
- struct gl_program **programs;
+ const char **names;
+ int *ids;
enum shader_time_shader_type *types;
- uint64_t *cumulative;
+ struct shader_times *cumulative;
int num_entries;
int max_entries;
double report_time;
void brw_store_register_mem64(struct brw_context *brw,
drm_intel_bo *bo, uint32_t reg, int idx);
+/** brw_conditional_render.c */
+void brw_init_conditional_render_functions(struct dd_function_table *functions);
+bool brw_check_conditional_render(struct brw_context *brw);
+
/** intel_batchbuffer.c */
void brw_load_register_mem(struct brw_context *brw,
uint32_t reg,
drm_intel_bo *bo,
uint32_t read_domains, uint32_t write_domain,
uint32_t offset);
+void brw_load_register_mem64(struct brw_context *brw,
+ uint32_t reg,
+ drm_intel_bo *bo,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t offset);
/*======================================================================
* brw_state_dump.c
/*======================================================================
* brw_program.c
*/
+static inline bool
+key_debug(struct brw_context *brw, const char *name, int a, int b)
+{
+ if (a != b) {
+ perf_debug(" %s %d->%d\n", name, a, b);
+ return true;
+ }
+ return false;
+}
+
void brwInitFragProgFuncs( struct dd_function_table *functions );
-int brw_get_scratch_size(int size);
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+static inline int
+brw_get_scratch_size(int size)
+{
+ return util_next_power_of_two(size | 1023);
+}
void brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size);
void brw_init_shader_time(struct brw_context *brw);
/* brw_fs_reg_allocate.cpp
*/
-void brw_fs_alloc_reg_sets(struct intel_screen *screen);
+void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
/* brw_vec4_reg_allocate.cpp */
-void brw_vec4_alloc_reg_set(struct intel_screen *screen);
+void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
/* brw_disasm.c */
-int brw_disassemble_inst(FILE *file, struct brw_context *brw,
+int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
struct brw_inst *inst, bool is_compacted);
/* brw_vs.c */
drm_intel_bo *bo,
uint32_t offset,
uint32_t size,
- uint32_t *out_offset,
- bool dword_pitch);
+ uint32_t *out_offset);
+void brw_create_buffer_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t *out_offset);
void brw_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset);
void brw_upload_ubo_surfaces(struct brw_context *brw,
struct gl_shader *shader,
struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data,
- bool dword_pitch);
+ struct brw_stage_prog_data *prog_data);
void brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+ struct gl_shader *shader,
+ struct brw_stage_state *stage_state,
+ struct brw_stage_prog_data *prog_data);
/* brw_surface_formats.c */
-bool brw_is_hiz_depth_format(struct brw_context *ctx, mesa_format format);
bool brw_render_target_supported(struct brw_context *brw,
struct gl_renderbuffer *rb);
+bool brw_losslessly_compressible_format(struct brw_context *brw,
+ uint32_t brw_format);
uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+ mesa_format format);
/* brw_performance_monitor.c */
void brw_init_performance_monitors(struct brw_context *brw);
extern GLenum
brw_get_graphics_reset_status(struct gl_context *ctx);
+/* brw_compute.c */
+extern void
+brw_init_compute_functions(struct dd_function_table *functions);
+
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
return (const struct brw_fragment_program *) p;
}
+static inline struct brw_compute_program *
+brw_compute_program(struct gl_compute_program *p)
+{
+ return (struct brw_compute_program *) p;
+}
+
/**
* Pre-gen6, the register file of the EUs was shared between threads,
* and each thread used some subset allocated on a 16-register block
extern const struct opcode_desc opcode_descs[128];
extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
void
brw_emit_depthbuffer(struct brw_context *brw);
uint32_t get_hw_prim_for_gl_prim(int mode);
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
- struct brw_vue_prog_key *key,
- bool program_uses_clip_distance);
-
void
gen6_upload_push_constants(struct brw_context *brw,
const struct gl_program *prog,
struct brw_stage_state *stage_state,
enum aub_state_struct_type type);
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
+/* brw_pipe_control.c */
+int brw_init_pipe_control(struct brw_context *brw,
+ const struct brw_device_info *info);
+void brw_fini_pipe_control(struct brw_context *brw);
+
+void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper);
+void brw_emit_mi_flush(struct brw_context *brw);
+void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void brw_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
+
#ifdef __cplusplus
}
#endif