#define BRWCONTEXT_INC
#include <stdbool.h>
-#include <string.h>
-#include "main/imports.h"
#include "main/macros.h"
-#include "main/mm.h"
#include "main/mtypes.h"
#include "brw_structs.h"
+#include "brw_compiler.h"
#include "intel_aub.h"
-#include "program/prog_parameter.h"
#ifdef __cplusplus
extern "C" {
#define virtual virt
#endif
-#include <drm.h>
#include <intel_bufmgr.h>
-#include <i915_drm.h>
#ifdef __cplusplus
#undef virtual
}
* enabled, it first passes them to a VS thread which is a good place
* for the driver to implement any active vertex shader.
*
+ * HS - Hull Shader (Tessellation Control Shader)
+ *
+ * TE - Tessellation Engine (Tessellation Primitive Generation)
+ *
+ * DS - Domain Shader (Tessellation Evaluation Shader)
+ *
* GS - Geometry Shader. This corresponds to a new DX10 concept. If
* enabled, incoming strips etc are passed to GS threads in individual
* line/triangle/point units. The GS thread may perform arbitary
BRW_CACHE_VS_PROG,
BRW_CACHE_FF_GS_PROG,
BRW_CACHE_GS_PROG,
+ BRW_CACHE_TCS_PROG,
+ BRW_CACHE_TES_PROG,
BRW_CACHE_CLIP_PROG,
BRW_CACHE_CS_PROG,
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
BRW_STATE_FRAGMENT_PROGRAM,
BRW_STATE_GEOMETRY_PROGRAM,
+ BRW_STATE_TESS_PROGRAMS,
BRW_STATE_VERTEX_PROGRAM,
BRW_STATE_CURBE_OFFSETS,
BRW_STATE_REDUCED_PRIMITIVE,
+ BRW_STATE_PATCH_PRIMITIVE,
BRW_STATE_PRIMITIVE,
BRW_STATE_CONTEXT,
BRW_STATE_PSP,
BRW_STATE_SURFACES,
- BRW_STATE_VS_BINDING_TABLE,
- BRW_STATE_GS_BINDING_TABLE,
- BRW_STATE_PS_BINDING_TABLE,
+ BRW_STATE_BINDING_TABLE_POINTERS,
BRW_STATE_INDICES,
BRW_STATE_VERTICES,
+ BRW_STATE_DEFAULT_TESS_LEVELS,
BRW_STATE_BATCH,
BRW_STATE_INDEX_BUFFER,
BRW_STATE_VS_CONSTBUF,
+ BRW_STATE_TCS_CONSTBUF,
+ BRW_STATE_TES_CONSTBUF,
BRW_STATE_GS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_VUE_MAP_VS,
BRW_STATE_VUE_MAP_GEOM_OUT,
BRW_STATE_TRANSFORM_FEEDBACK,
BRW_STATE_RASTERIZER_DISCARD,
BRW_STATE_STATS_WM,
BRW_STATE_UNIFORM_BUFFER,
BRW_STATE_ATOMIC_BUFFER,
+ BRW_STATE_IMAGE_UNITS,
BRW_STATE_META_IN_PROGRESS,
BRW_STATE_INTERPOLATION_MAP,
BRW_STATE_PUSH_CONSTANT_ALLOCATION,
BRW_STATE_SAMPLER_STATE_TABLE,
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
BRW_STATE_COMPUTE_PROGRAM,
+ BRW_STATE_CS_WORK_GROUPS,
+ BRW_STATE_URB_SIZE,
+ BRW_STATE_CC_STATE,
BRW_NUM_STATE_BITS
};
#define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG)
#define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG)
#define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG)
+#define BRW_NEW_TCS_PROG_DATA (1ull << BRW_CACHE_TCS_PROG)
+#define BRW_NEW_TES_PROG_DATA (1ull << BRW_CACHE_TES_PROG)
#define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG)
#define BRW_NEW_CS_PROG_DATA (1ull << BRW_CACHE_CS_PROG)
#define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE)
#define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM)
#define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM)
+#define BRW_NEW_TESS_PROGRAMS (1ull << BRW_STATE_TESS_PROGRAMS)
#define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM)
#define BRW_NEW_CURBE_OFFSETS (1ull << BRW_STATE_CURBE_OFFSETS)
#define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE)
#define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE)
#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT)
#define BRW_NEW_PSP (1ull << BRW_STATE_PSP)
#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_VS_BINDING_TABLE (1ull << BRW_STATE_VS_BINDING_TABLE)
-#define BRW_NEW_GS_BINDING_TABLE (1ull << BRW_STATE_GS_BINDING_TABLE)
-#define BRW_NEW_PS_BINDING_TABLE (1ull << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES)
#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES)
+#define BRW_NEW_DEFAULT_TESS_LEVELS (1ull << BRW_STATE_DEFAULT_TESS_LEVELS)
/**
* Used for any batch entry with a relocated pointer that will be used
* by any 3D rendering.
/** \see brw.state.depth_region */
#define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_TCS_CONSTBUF (1ull << BRW_STATE_TCS_CONSTBUF)
+#define BRW_NEW_TES_CONSTBUF (1ull << BRW_STATE_TES_CONSTBUF)
#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS (1ull << BRW_STATE_VUE_MAP_VS)
#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM)
#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER)
#define BRW_NEW_ATOMIC_BUFFER (1ull << BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS)
#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS)
#define BRW_NEW_INTERPOLATION_MAP (1ull << BRW_STATE_INTERPOLATION_MAP)
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
+#define BRW_NEW_URB_SIZE (1ull << BRW_STATE_URB_SIZE)
+#define BRW_NEW_CC_STATE (1ull << BRW_STATE_CC_STATE)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
};
+/** Subclass of Mesa tessellation control program */
+struct brw_tess_ctrl_program {
+ struct gl_tess_ctrl_program program;
+ unsigned id; /**< serial no. to identify tess ctrl progs, never re-used */
+};
+
+
+/** Subclass of Mesa tessellation evaluation program */
+struct brw_tess_eval_program {
+ struct gl_tess_eval_program program;
+ unsigned id; /**< serial no. to identify tess eval progs, never re-used */
+};
+
+
/** Subclass of Mesa geometry program */
struct brw_geometry_program {
struct gl_geometry_program program;
bool compiled_once;
};
-/* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix brw_stage_prog_data_compare().
- */
-struct brw_stage_prog_data {
- struct {
- /** size of our binding table. */
- uint32_t size_bytes;
-
- /** @{
- * surface indices for the various groups of surfaces
- */
- uint32_t pull_constants_start;
- uint32_t texture_start;
- uint32_t gather_texture_start;
- uint32_t ubo_start;
- uint32_t abo_start;
- uint32_t image_start;
- uint32_t shader_time_start;
- /** @} */
- } binding_table;
-
- GLuint nr_params; /**< number of float params/constants */
- GLuint nr_pull_params;
-
- unsigned curb_read_length;
- unsigned total_scratch;
-
- /**
- * Register where the thread expects to find input data from the URB
- * (typically uniforms, followed by vertex or fragment attributes).
- */
- unsigned dispatch_grf_start_reg;
-
- bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
-
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
- *
- * These must be the last fields of the struct (see
- * brw_stage_prog_data_compare()).
- */
- const gl_constant_value **param;
- const gl_constant_value **pull_param;
-};
-
-/* Data about a particular attempt to compile a program. Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- *
- * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_wm_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint num_varying_inputs;
-
- GLuint dispatch_grf_start_reg_16;
- GLuint reg_blocks;
- GLuint reg_blocks_16;
-
- struct {
- /** @{
- * surface indices the WM-specific surfaces
- */
- uint32_t render_target_start;
- /** @} */
- } binding_table;
-
- uint8_t computed_depth_mode;
-
- bool no_8;
- bool dual_src_blend;
- bool uses_pos_offset;
- bool uses_omask;
- bool uses_kill;
- bool pulls_bary;
- uint32_t prog_offset_16;
-
- /**
- * Mask of which interpolation modes are required by the fragment shader.
- * Used in hardware setup on gen6+.
- */
- uint32_t barycentric_interp_modes;
-
- /**
- * Map from gl_varying_slot to the position within the FS setup data
- * payload where the varying's attribute vertex deltas should be delivered.
- * For varying slots that are not used by the FS, the value is -1.
- */
- int urb_setup[VARYING_SLOT_MAX];
-};
-
-/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_cs_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint dispatch_grf_start_reg_16;
- unsigned local_size[3];
- unsigned simd_size;
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot. The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
- BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_PAD,
- /**
- * Technically this is not a varying but just a placeholder that
- * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
- * builtin variable to be compiled correctly. see compile_sf_prog() for
- * more info.
- */
- BRW_VARYING_SLOT_PNTC,
- BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
- /**
- * Bitfield representing all varying slots that are (a) stored in this VUE
- * map, and (b) actually written by the shader. Does not include any of
- * the additional varying slots defined in brw_varying_slot.
- */
- GLbitfield64 slots_valid;
-
- /**
- * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
- * not stored in a slot (because they are not written, or because
- * additional processing is applied before storing them in the VUE), the
- * value is -1.
- */
- signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Map from VUE slot to gl_varying_slot value. For slots that do not
- * directly correspond to a gl_varying_slot, the value comes from
- * brw_varying_slot.
- *
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
- */
- signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Total number of VUE slots in use
- */
- int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
- return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
-{
- return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(const struct brw_device_info *devinfo,
- struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid);
-
-
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
unsigned svbi_postincrement_value;
};
-enum shader_dispatch_mode {
- DISPATCH_MODE_4X1_SINGLE = 0,
- DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
- DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
- DISPATCH_MODE_SIMD8 = 3,
-};
-
-/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_vue_prog_data {
- struct brw_stage_prog_data base;
- struct brw_vue_map vue_map;
-
- GLuint urb_read_length;
- GLuint total_grf;
-
- /* Used for calculating urb partitions. In the VS, this is the size of the
- * URB entry used for both input and output to the thread. In the GS, this
- * is the size of the URB entry used for output.
- */
- GLuint urb_entry_size;
-
- enum shader_dispatch_mode dispatch_mode;
-};
-
-
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_vs_prog_data {
- struct brw_vue_prog_data base;
-
- GLbitfield64 inputs_read;
-
- bool uses_vertexid;
- bool uses_instanceid;
-};
-
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
/** Max number of render targets in a shader */
#define BRW_MAX_DRAW_BUFFERS 8
+/** Max number of UBOs in a shader */
+#define BRW_MAX_UBO 14
+
+/** Max number of SSBOs in a shader */
+#define BRW_MAX_SSBO 12
+
/** Max number of atomic counter buffer objects in a shader */
#define BRW_MAX_ABO 16
#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \
BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
- 12 + /* ubo */ \
+ BRW_MAX_UBO + \
+ BRW_MAX_SSBO + \
BRW_MAX_ABO + \
BRW_MAX_IMAGES + \
- 2 /* shader time, pull constants */)
+ 2 + /* shader time, pull constants */ \
+ 1 /* cs num work groups */)
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_gs_prog_data
-{
- struct brw_vue_prog_data base;
-
- /**
- * Size of an output vertex, measured in HWORDS (32 bytes).
- */
- unsigned output_vertex_size_hwords;
-
- unsigned output_topology;
-
- /**
- * Size of the control data (cut bits or StreamID bits), in hwords (32
- * bytes). 0 if there is no control data.
- */
- unsigned control_data_header_size_hwords;
-
- /**
- * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
- * if the control data is StreamID bits, or
- * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
- * Ignored if control_data_header_size is 0.
- */
- unsigned control_data_format;
-
- bool include_primitive_id;
-
- int invocations;
-
- /**
- * Gen6 transform feedback enabled flag.
- */
- bool gen6_xfb_enabled;
-
- /**
- * Gen6: Provoking vertex convention for odd-numbered triangles
- * in tristrips.
- */
- GLuint pv_first:1;
-
- /**
- * Gen6: Number of varyings that are output to transform feedback.
- */
- GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * gl_varying_slot that should be streamed out through that binding table
- * entry.
- */
- unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * swizzles that should be used when streaming out data through that
- * binding table entry.
- */
- unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
/**
* Stride in bytes between shader_time entries.
*
};
-typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
-typedef void (*cache_aux_free_func)(const void *aux);
-
struct brw_cache {
struct brw_context *brw;
uint32_t next_offset;
bool bo_used_by_gpu;
-
- /**
- * Optional functions used in determining whether the prog_data for a new
- * cache item matches an existing cache item (in case there's relevant data
- * outside of the prog_data). If NULL, a plain memcmp is done.
- */
- cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
- /** Optional functions for freeing other pointers attached to a prog_data. */
- cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
enum shader_time_shader_type {
ST_NONE,
ST_VS,
+ ST_TCS,
+ ST_TES,
ST_GS,
ST_FS8,
ST_FS16,
#ifdef DEBUG
uint16_t emit, total;
#endif
- uint16_t used, reserved_space;
+ uint16_t reserved_space;
+ uint32_t *map_next;
uint32_t *map;
uint32_t *cpu_map;
#define BATCH_SZ (8192*sizeof(uint32_t))
bool needs_sol_reset;
struct {
- uint16_t used;
+ uint32_t *map_next;
int reloc_count;
} saved;
};
+#define MAX_GS_INPUT_VERTICES 6
+
#define BRW_MAX_XFB_STREAMS 4
struct brw_transform_feedback_object {
struct shader_times;
+struct brw_l3_config;
+
/**
* brw_context is derived from gl_context.
*/
bool always_flush_cache;
bool disable_throttling;
bool precompile;
+ bool dual_color_blend_by_location;
driOptionCache optionCache;
/** @} */
bool has_pln;
bool no_simd8;
bool use_rep_send;
+ bool use_resource_streamer;
+
+ /**
+ * Whether LRI can be used to write register values from the batch buffer.
+ */
+ bool can_do_pipelined_register_writes;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
uint32_t pma_stall_bits;
struct {
- /** The value of gl_BaseVertex for the current _mesa_prim. */
- int gl_basevertex;
+ struct {
+ /** The value of gl_BaseVertex for the current _mesa_prim. */
+ int gl_basevertex;
+
+ /** The value of gl_BaseInstance for the current _mesa_prim. */
+ int gl_baseinstance;
+ } params;
/**
* Buffer and offset used for GL_ARB_shader_draw_parameters
*/
drm_intel_bo *draw_params_bo;
uint32_t draw_params_offset;
+
+ /**
+ * The value of gl_DrawID for the current _mesa_prim. This always comes
+ * in from it's own vertex buffer since it's not part of the indirect
+ * draw parameters.
+ */
+ int gl_drawid;
+ drm_intel_bo *draw_id_bo;
+ uint32_t draw_id_offset;
} draw;
+ struct {
+ /**
+ * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+ * an indirect call, and num_work_groups_offset is valid. Otherwise,
+ * num_work_groups is set based on glDispatchCompute.
+ */
+ drm_intel_bo *num_work_groups_bo;
+ GLintptr num_work_groups_offset;
+ const GLuint *num_work_groups;
+ } compute;
+
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
*/
const struct gl_vertex_program *vertex_program;
const struct gl_geometry_program *geometry_program;
+ const struct gl_tess_ctrl_program *tess_ctrl_program;
+ const struct gl_tess_eval_program *tess_eval_program;
const struct gl_fragment_program *fragment_program;
const struct gl_compute_program *compute_program;
* Platform specific constants containing the maximum number of threads
* for each pipeline stage.
*/
- int max_vs_threads;
- int max_hs_threads;
- int max_ds_threads;
- int max_gs_threads;
- int max_wm_threads;
- int max_cs_threads;
+ unsigned max_vs_threads;
+ unsigned max_hs_threads;
+ unsigned max_ds_threads;
+ unsigned max_gs_threads;
+ unsigned max_wm_threads;
+ unsigned max_cs_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
GLuint vsize; /* vertex size plus header in urb registers */
GLuint gsize; /* GS output size in urb registers */
+ GLuint hsize; /* Tessellation control output size in urb registers */
+ GLuint dsize; /* Tessellation evaluation output size in urb registers */
GLuint csize; /* constant buffer size in urb registers */
GLuint sfsize; /* setup data size in urb registers */
GLuint max_gs_entries; /* Maximum number of GS entries */
GLuint nr_vs_entries;
+ GLuint nr_hs_entries;
+ GLuint nr_ds_entries;
GLuint nr_gs_entries;
GLuint nr_clip_entries;
GLuint nr_sf_entries;
GLuint nr_cs_entries;
GLuint vs_start;
+ GLuint hs_start;
+ GLuint ds_start;
GLuint gs_start;
GLuint clip_start;
GLuint sf_start;
GLuint cs_start;
- GLuint size; /* Hardware URB size, in KB. */
+ /**
+ * URB size in the current configuration. The units this is expressed
+ * in are somewhat inconsistent, see brw_device_info::urb::size.
+ *
+ * FINISHME: Represent the URB size consistently in KB on all platforms.
+ */
+ GLuint size;
/* True if the most recently sent _3DSTATE_URB message allocated
* URB space for the GS.
*/
bool gs_present;
+
+ /* True if the most recently sent _3DSTATE_URB message allocated
+ * URB space for the HS and DS.
+ */
+ bool tess_present;
} urb;
GLuint curbe_offset;
} curbe;
- /**
- * Layout of vertex data exiting the vertex shader.
- *
- * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
- */
- struct brw_vue_map vue_map_vs;
-
/**
* Layout of vertex data exiting the geometry portion of the pipleine.
- * This comes from the geometry shader if one exists, otherwise from the
- * vertex shader.
+ * This comes from the last enabled shader stage (GS, DS, or VS).
*
* BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
*/
struct brw_vs_prog_data *prog_data;
} vs;
+ struct {
+ struct brw_stage_state base;
+ struct brw_tcs_prog_data *prog_data;
+
+ /**
+ * True if the 3DSTATE_HS command most recently emitted to the 3D
+ * pipeline enabled the HS; false otherwise.
+ */
+ bool enabled;
+ } tcs;
+
+ struct {
+ struct brw_stage_state base;
+ struct brw_tes_prog_data *prog_data;
+
+ /**
+ * True if the 3DSTATE_DS command most recently emitted to the 3D
+ * pipeline enabled the DS; false otherwise.
+ */
+ bool enabled;
+ } tes;
+
struct {
struct brw_stage_state base;
struct brw_gs_prog_data *prog_data;
*/
drm_intel_bo *multisampled_null_render_target_bo;
uint32_t fast_clear_op;
+
+ float offset_clamp;
} wm;
struct {
struct brw_cs_prog_data *prog_data;
} cs;
+ /* RS hardware binding table */
+ struct {
+ drm_intel_bo *bo;
+ uint32_t next_offset;
+ } hw_bt_pool;
+
struct {
uint32_t state_offset;
uint32_t blend_state_offset;
} perfmon;
int num_atoms[BRW_NUM_PIPELINES];
- const struct brw_tracked_state render_atoms[57];
- const struct brw_tracked_state compute_atoms[3];
+ const struct brw_tracked_state render_atoms[76];
+ const struct brw_tracked_state compute_atoms[11];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
uint32_t num_instances;
int basevertex;
+ struct {
+ const struct brw_l3_config *config;
+ } l3;
+
struct {
drm_intel_bo *bo;
const char **names;
/*======================================================================
* brw_program.c
*/
+static inline bool
+key_debug(struct brw_context *brw, const char *name, int a, int b)
+{
+ if (a != b) {
+ perf_debug(" %s %d->%d\n", name, a, b);
+ return true;
+ }
+ return false;
+}
+
void brwInitFragProgFuncs( struct dd_function_table *functions );
-int brw_get_scratch_size(int size);
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+static inline int
+brw_get_scratch_size(int size)
+{
+ return util_next_power_of_two(size | 1023);
+}
void brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size);
void brw_init_shader_time(struct brw_context *brw);
drm_intel_bo *bo,
uint32_t offset,
uint32_t size,
- uint32_t *out_offset,
- bool dword_pitch);
+ uint32_t *out_offset);
+void brw_create_buffer_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t *out_offset);
void brw_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset);
void brw_upload_ubo_surfaces(struct brw_context *brw,
struct gl_shader *shader,
struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data,
- bool dword_pitch);
+ struct brw_stage_prog_data *prog_data);
void brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+ struct gl_shader *shader,
+ struct brw_stage_state *stage_state,
+ struct brw_stage_prog_data *prog_data);
/* brw_surface_formats.c */
bool brw_render_target_supported(struct brw_context *brw,
struct gl_renderbuffer *rb);
+bool brw_losslessly_compressible_format(const struct brw_context *brw,
+ uint32_t brw_format);
uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+ mesa_format format);
/* brw_performance_monitor.c */
void brw_init_performance_monitors(struct brw_context *brw);
/* gen7_urb.c */
void
gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
+ unsigned hs_size, unsigned ds_size,
unsigned gs_size, unsigned fs_size);
void
gen7_emit_urb_state(struct brw_context *brw,
- unsigned nr_vs_entries, unsigned vs_size,
- unsigned vs_start, unsigned nr_gs_entries,
+ unsigned nr_vs_entries,
+ unsigned vs_size, unsigned vs_start,
+ unsigned nr_hs_entries,
+ unsigned hs_size, unsigned hs_start,
+ unsigned nr_ds_entries,
+ unsigned ds_size, unsigned ds_start,
+ unsigned nr_gs_entries,
unsigned gs_size, unsigned gs_start);
return (const struct brw_vertex_program *) p;
}
+static inline struct brw_tess_ctrl_program *
+brw_tess_ctrl_program(struct gl_tess_ctrl_program *p)
+{
+ return (struct brw_tess_ctrl_program *) p;
+}
+
+static inline struct brw_tess_eval_program *
+brw_tess_eval_program(struct gl_tess_eval_program *p)
+{
+ return (struct brw_tess_eval_program *) p;
+}
+
static inline struct brw_geometry_program *
brw_geometry_program(struct gl_geometry_program *p)
{
extern const struct opcode_desc opcode_descs[128];
extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
void
brw_emit_depthbuffer(struct brw_context *brw);
uint32_t get_hw_prim_for_gl_prim(int mode);
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
- struct brw_vue_prog_key *key,
- bool program_uses_clip_distance);
-
void
gen6_upload_push_constants(struct brw_context *brw,
const struct gl_program *prog,