X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=6b82bea52c02173ebd8cde84b693f3ebf0e1a58b;hb=7287cc844057e2242592df40409e31780f090ece;hp=3bae90dafaf000330f6dd19b611c880753c2997e;hpb=bfdc76c133bb9e91049824b7480f862f142e4195;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3bae90dafaf..6b82bea52c0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -34,14 +34,11 @@ #define BRWCONTEXT_INC #include -#include -#include "main/imports.h" #include "main/macros.h" -#include "main/mm.h" #include "main/mtypes.h" #include "brw_structs.h" +#include "brw_compiler.h" #include "intel_aub.h" -#include "program/prog_parameter.h" #ifdef __cplusplus extern "C" { @@ -49,9 +46,7 @@ extern "C" { #define virtual virt #endif -#include #include -#include #ifdef __cplusplus #undef virtual } @@ -115,6 +110,12 @@ extern "C" { * enabled, it first passes them to a VS thread which is a good place * for the driver to implement any active vertex shader. * + * HS - Hull Shader (Tessellation Control Shader) + * + * TE - Tessellation Engine (Tessellation Primitive Generation) + * + * DS - Domain Shader (Tessellation Evaluation Shader) + * * GS - Geometry Shader. This corresponds to a new DX10 concept. If * enabled, incoming strips etc are passed to GS threads in individual * line/triangle/point units. The GS thread may perform arbitary @@ -165,6 +166,8 @@ enum brw_cache_id { BRW_CACHE_VS_PROG, BRW_CACHE_FF_GS_PROG, BRW_CACHE_GS_PROG, + BRW_CACHE_TCS_PROG, + BRW_CACHE_TES_PROG, BRW_CACHE_CLIP_PROG, BRW_CACHE_CS_PROG, @@ -176,21 +179,24 @@ enum brw_state_id { BRW_STATE_URB_FENCE = BRW_MAX_CACHE, BRW_STATE_FRAGMENT_PROGRAM, BRW_STATE_GEOMETRY_PROGRAM, + BRW_STATE_TESS_PROGRAMS, BRW_STATE_VERTEX_PROGRAM, BRW_STATE_CURBE_OFFSETS, BRW_STATE_REDUCED_PRIMITIVE, + BRW_STATE_PATCH_PRIMITIVE, BRW_STATE_PRIMITIVE, BRW_STATE_CONTEXT, BRW_STATE_PSP, BRW_STATE_SURFACES, - BRW_STATE_VS_BINDING_TABLE, - BRW_STATE_GS_BINDING_TABLE, - BRW_STATE_PS_BINDING_TABLE, + BRW_STATE_BINDING_TABLE_POINTERS, BRW_STATE_INDICES, BRW_STATE_VERTICES, + BRW_STATE_DEFAULT_TESS_LEVELS, BRW_STATE_BATCH, BRW_STATE_INDEX_BUFFER, BRW_STATE_VS_CONSTBUF, + BRW_STATE_TCS_CONSTBUF, + BRW_STATE_TES_CONSTBUF, BRW_STATE_GS_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, @@ -214,6 +220,8 @@ enum brw_state_id { BRW_STATE_VS_ATTRIB_WORKAROUNDS, BRW_STATE_COMPUTE_PROGRAM, BRW_STATE_CS_WORK_GROUPS, + BRW_STATE_URB_SIZE, + BRW_STATE_CC_STATE, BRW_NUM_STATE_BITS }; @@ -248,23 +256,26 @@ enum brw_state_id { #define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG) #define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG) #define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG) +#define BRW_NEW_TCS_PROG_DATA (1ull << BRW_CACHE_TCS_PROG) +#define BRW_NEW_TES_PROG_DATA (1ull << BRW_CACHE_TES_PROG) #define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG) #define BRW_NEW_CS_PROG_DATA (1ull << BRW_CACHE_CS_PROG) #define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE) #define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM) #define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM) +#define BRW_NEW_TESS_PROGRAMS (1ull << BRW_STATE_TESS_PROGRAMS) #define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM) #define BRW_NEW_CURBE_OFFSETS (1ull << BRW_STATE_CURBE_OFFSETS) #define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE) +#define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE) #define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE) #define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT) #define BRW_NEW_PSP (1ull << BRW_STATE_PSP) #define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES) -#define BRW_NEW_VS_BINDING_TABLE (1ull << BRW_STATE_VS_BINDING_TABLE) -#define BRW_NEW_GS_BINDING_TABLE (1ull << BRW_STATE_GS_BINDING_TABLE) -#define BRW_NEW_PS_BINDING_TABLE (1ull << BRW_STATE_PS_BINDING_TABLE) +#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS) #define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES) #define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES) +#define BRW_NEW_DEFAULT_TESS_LEVELS (1ull << BRW_STATE_DEFAULT_TESS_LEVELS) /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. @@ -273,6 +284,8 @@ enum brw_state_id { /** \see brw.state.depth_region */ #define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF) +#define BRW_NEW_TCS_CONSTBUF (1ull << BRW_STATE_TCS_CONSTBUF) +#define BRW_NEW_TES_CONSTBUF (1ull << BRW_STATE_TES_CONSTBUF) #define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS) @@ -296,6 +309,8 @@ enum brw_state_id { #define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) #define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM) #define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS) +#define BRW_NEW_URB_SIZE (1ull << BRW_STATE_URB_SIZE) +#define BRW_NEW_CC_STATE (1ull << BRW_STATE_CC_STATE) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -313,6 +328,20 @@ struct brw_vertex_program { }; +/** Subclass of Mesa tessellation control program */ +struct brw_tess_ctrl_program { + struct gl_tess_ctrl_program program; + unsigned id; /**< serial no. to identify tess ctrl progs, never re-used */ +}; + + +/** Subclass of Mesa tessellation evaluation program */ +struct brw_tess_eval_program { + struct gl_tess_eval_program program; + unsigned id; /**< serial no. to identify tess eval progs, never re-used */ +}; + + /** Subclass of Mesa geometry program */ struct brw_geometry_program { struct gl_geometry_program program; @@ -340,275 +369,6 @@ struct brw_shader { bool compiled_once; }; -/* Note: If adding fields that need anything besides a normal memcmp() for - * comparing them, be sure to go fix brw_stage_prog_data_compare(). - */ -struct brw_stage_prog_data { - struct { - /** size of our binding table. */ - uint32_t size_bytes; - - /** @{ - * surface indices for the various groups of surfaces - */ - uint32_t pull_constants_start; - uint32_t texture_start; - uint32_t gather_texture_start; - uint32_t ubo_start; - uint32_t abo_start; - uint32_t image_start; - uint32_t shader_time_start; - /** @} */ - } binding_table; - - GLuint nr_params; /**< number of float params/constants */ - GLuint nr_pull_params; - unsigned nr_image_params; - - unsigned curb_read_length; - unsigned total_scratch; - - /** - * Register where the thread expects to find input data from the URB - * (typically uniforms, followed by vertex or fragment attributes). - */ - unsigned dispatch_grf_start_reg; - - bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ - - /* Pointers to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). - * - * These must be the last fields of the struct (see - * brw_stage_prog_data_compare()). - */ - const gl_constant_value **param; - const gl_constant_value **pull_param; - - /** - * Image metadata passed to the shader as uniforms. This is deliberately - * ignored by brw_stage_prog_data_compare() because its contents don't have - * any influence on program compilation. - */ - struct brw_image_param *image_param; -}; - -/* - * Image metadata structure as laid out in the shader parameter - * buffer. Entries have to be 16B-aligned for the vec4 back-end to be - * able to use them. That's okay because the padding and any unused - * entries [most of them except when we're doing untyped surface - * access] will be removed by the uniform packing pass. - */ -#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0 -#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4 -#define BRW_IMAGE_PARAM_SIZE_OFFSET 8 -#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12 -#define BRW_IMAGE_PARAM_TILING_OFFSET 16 -#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20 -#define BRW_IMAGE_PARAM_SIZE 24 - -struct brw_image_param { - /** Surface binding table index. */ - uint32_t surface_idx; - - /** Offset applied to the X and Y surface coordinates. */ - uint32_t offset[2]; - - /** Surface X, Y and Z dimensions. */ - uint32_t size[3]; - - /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in - * pixels, vertical slice stride in pixels. - */ - uint32_t stride[4]; - - /** Log2 of the tiling modulus in the X, Y and Z dimension. */ - uint32_t tiling[3]; - - /** - * Right shift to apply for bit 6 address swizzling. Two different - * swizzles can be specified and will be applied one after the other. The - * resulting address will be: - * - * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^ - * (addr >> swizzling[1]))) - * - * Use \c 0xff if any of the swizzles is not required. - */ - uint32_t swizzling[2]; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs. - * - * Note: brw_wm_prog_data_compare() must be updated when adding fields to this - * struct! - */ -struct brw_wm_prog_data { - struct brw_stage_prog_data base; - - GLuint num_varying_inputs; - - GLuint dispatch_grf_start_reg_16; - GLuint reg_blocks; - GLuint reg_blocks_16; - - struct { - /** @{ - * surface indices the WM-specific surfaces - */ - uint32_t render_target_start; - /** @} */ - } binding_table; - - uint8_t computed_depth_mode; - - bool early_fragment_tests; - bool no_8; - bool dual_src_blend; - bool uses_pos_offset; - bool uses_omask; - bool uses_kill; - bool pulls_bary; - uint32_t prog_offset_16; - - /** - * Mask of which interpolation modes are required by the fragment shader. - * Used in hardware setup on gen6+. - */ - uint32_t barycentric_interp_modes; - - /** - * Map from gl_varying_slot to the position within the FS setup data - * payload where the varying's attribute vertex deltas should be delivered. - * For varying slots that are not used by the FS, the value is -1. - */ - int urb_setup[VARYING_SLOT_MAX]; -}; - -/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this - * struct! - */ -struct brw_cs_prog_data { - struct brw_stage_prog_data base; - - GLuint dispatch_grf_start_reg_16; - unsigned local_size[3]; - unsigned simd_size; - bool uses_barrier; - bool uses_num_work_groups; - - struct { - /** @{ - * surface indices the CS-specific surfaces - */ - uint32_t work_groups_start; - /** @} */ - } binding_table; -}; - -/** - * Enum representing the i965-specific vertex results that don't correspond - * exactly to any element of gl_varying_slot. The values of this enum are - * assigned such that they don't conflict with gl_varying_slot. - */ -typedef enum -{ - BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX, - BRW_VARYING_SLOT_PAD, - /** - * Technically this is not a varying but just a placeholder that - * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord - * builtin variable to be compiled correctly. see compile_sf_prog() for - * more info. - */ - BRW_VARYING_SLOT_PNTC, - BRW_VARYING_SLOT_COUNT -} brw_varying_slot; - - -/** - * Data structure recording the relationship between the gl_varying_slot enum - * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a - * single octaword within the VUE (128 bits). - * - * Note that each BRW register contains 256 bits (2 octawords), so when - * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two - * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as - * in a vertex shader), each register corresponds to a single VUE slot, since - * it contains data for two separate vertices. - */ -struct brw_vue_map { - /** - * Bitfield representing all varying slots that are (a) stored in this VUE - * map, and (b) actually written by the shader. Does not include any of - * the additional varying slots defined in brw_varying_slot. - */ - GLbitfield64 slots_valid; - - /** - * Is this VUE map for a separate shader pipeline? - * - * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched - * without the linker having a chance to dead code eliminate unused varyings. - * - * This means that we have to use a fixed slot layout, based on the output's - * location field, rather than assigning slots in a compact contiguous block. - */ - bool separate; - - /** - * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are - * not stored in a slot (because they are not written, or because - * additional processing is applied before storing them in the VUE), the - * value is -1. - */ - signed char varying_to_slot[BRW_VARYING_SLOT_COUNT]; - - /** - * Map from VUE slot to gl_varying_slot value. For slots that do not - * directly correspond to a gl_varying_slot, the value comes from - * brw_varying_slot. - * - * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this - * simplifies code that uses the value stored in slot_to_varying to - * create a bit mask). - */ - signed char slot_to_varying[BRW_VARYING_SLOT_COUNT]; - - /** - * Total number of VUE slots in use - */ - int num_slots; -}; - -/** - * Convert a VUE slot number into a byte offset within the VUE. - */ -static inline GLuint brw_vue_slot_to_offset(GLuint slot) -{ - return 16*slot; -} - -/** - * Convert a vertex output (brw_varying_slot) into a byte offset within the - * VUE. - */ -static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, - GLuint varying) -{ - return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); -} - -void brw_compute_vue_map(const struct brw_device_info *devinfo, - struct brw_vue_map *vue_map, - GLbitfield64 slots_valid, - bool separate_shader); - - /** * Bitmask indicating which fragment shader inputs represent varyings (and * hence have to be delivered to the fragment shader by the SF/SBE stage). @@ -685,51 +445,18 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; -enum shader_dispatch_mode { - DISPATCH_MODE_4X1_SINGLE = 0, - DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, - DISPATCH_MODE_4X2_DUAL_OBJECT = 2, - DISPATCH_MODE_SIMD8 = 3, -}; - -/* Note: brw_vue_prog_data_compare() must be updated when adding fields to - * this struct! - */ -struct brw_vue_prog_data { - struct brw_stage_prog_data base; - struct brw_vue_map vue_map; - - GLuint urb_read_length; - GLuint total_grf; - - /* Used for calculating urb partitions. In the VS, this is the size of the - * URB entry used for both input and output to the thread. In the GS, this - * is the size of the URB entry used for output. - */ - GLuint urb_entry_size; - - enum shader_dispatch_mode dispatch_mode; -}; - - -/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this - * struct! - */ -struct brw_vs_prog_data { - struct brw_vue_prog_data base; - - GLbitfield64 inputs_read; - - bool uses_vertexid; - bool uses_instanceid; -}; - /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 32 /** Max number of render targets in a shader */ #define BRW_MAX_DRAW_BUFFERS 8 +/** Max number of UBOs in a shader */ +#define BRW_MAX_UBO 14 + +/** Max number of SSBOs in a shader */ +#define BRW_MAX_SSBO 12 + /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 @@ -766,7 +493,8 @@ struct brw_vs_prog_data { #define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \ BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \ - 12 + /* ubo */ \ + BRW_MAX_UBO + \ + BRW_MAX_SSBO + \ BRW_MAX_ABO + \ BRW_MAX_IMAGES + \ 2 + /* shader time, pull constants */ \ @@ -774,74 +502,6 @@ struct brw_vs_prog_data { #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to - * this struct! - */ -struct brw_gs_prog_data -{ - struct brw_vue_prog_data base; - - /** - * Size of an output vertex, measured in HWORDS (32 bytes). - */ - unsigned output_vertex_size_hwords; - - unsigned output_topology; - - /** - * Size of the control data (cut bits or StreamID bits), in hwords (32 - * bytes). 0 if there is no control data. - */ - unsigned control_data_header_size_hwords; - - /** - * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID - * if the control data is StreamID bits, or - * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). - * Ignored if control_data_header_size is 0. - */ - unsigned control_data_format; - - bool include_primitive_id; - - /** - * The number of vertices emitted, if constant - otherwise -1. - */ - int static_vertex_count; - - int invocations; - - /** - * Gen6 transform feedback enabled flag. - */ - bool gen6_xfb_enabled; - - /** - * Gen6: Provoking vertex convention for odd-numbered triangles - * in tristrips. - */ - GLuint pv_first:1; - - /** - * Gen6: Number of varyings that are output to transform feedback. - */ - GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ - - /** - * Gen6: Map from the index of a transform feedback binding table entry to the - * gl_varying_slot that should be streamed out through that binding table - * entry. - */ - unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; - - /** - * Gen6: Map from the index of a transform feedback binding table entry to the - * swizzles that should be used when streaming out data through that - * binding table entry. - */ - unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; -}; - /** * Stride in bytes between shader_time entries. * @@ -869,8 +529,6 @@ struct brw_cache_item { }; -typedef void (*cache_aux_free_func)(const void *aux); - struct brw_cache { struct brw_context *brw; @@ -880,9 +538,6 @@ struct brw_cache { uint32_t next_offset; bool bo_used_by_gpu; - - /** Optional functions for freeing other pointers attached to a prog_data. */ - cache_aux_free_func aux_free[BRW_MAX_CACHE]; }; @@ -900,6 +555,8 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, + ST_TCS, + ST_TES, ST_GS, ST_FS8, ST_FS16, @@ -967,6 +624,8 @@ struct intel_batchbuffer { } saved; }; +#define MAX_GS_INPUT_VERTICES 6 + #define BRW_MAX_XFB_STREAMS 4 struct brw_transform_feedback_object { @@ -1049,6 +708,8 @@ enum brw_predicate_state { struct shader_times; +struct brw_l3_config; + /** * brw_context is derived from gl_context. */ @@ -1175,6 +836,7 @@ struct brw_context bool always_flush_cache; bool disable_throttling; bool precompile; + bool dual_color_blend_by_location; driOptionCache optionCache; /** @} */ @@ -1214,6 +876,11 @@ struct brw_context bool use_rep_send; bool use_resource_streamer; + /** + * Whether LRI can be used to write register values from the batch buffer. + */ + bool can_do_pipelined_register_writes; + /** * Some versions of Gen hardware don't do centroid interpolation correctly * on unlit pixels, causing incorrect values for derivatives near triangle @@ -1245,8 +912,13 @@ struct brw_context uint32_t pma_stall_bits; struct { - /** The value of gl_BaseVertex for the current _mesa_prim. */ - int gl_basevertex; + struct { + /** The value of gl_BaseVertex for the current _mesa_prim. */ + int gl_basevertex; + + /** The value of gl_BaseInstance for the current _mesa_prim. */ + int gl_baseinstance; + } params; /** * Buffer and offset used for GL_ARB_shader_draw_parameters @@ -1254,6 +926,15 @@ struct brw_context */ drm_intel_bo *draw_params_bo; uint32_t draw_params_offset; + + /** + * The value of gl_DrawID for the current _mesa_prim. This always comes + * in from it's own vertex buffer since it's not part of the indirect + * draw parameters. + */ + int gl_drawid; + drm_intel_bo *draw_id_bo; + uint32_t draw_id_offset; } draw; struct { @@ -1317,6 +998,8 @@ struct brw_context */ const struct gl_vertex_program *vertex_program; const struct gl_geometry_program *geometry_program; + const struct gl_tess_ctrl_program *tess_ctrl_program; + const struct gl_tess_eval_program *tess_eval_program; const struct gl_fragment_program *fragment_program; const struct gl_compute_program *compute_program; @@ -1342,6 +1025,8 @@ struct brw_context struct { GLuint vsize; /* vertex size plus header in urb registers */ GLuint gsize; /* GS output size in urb registers */ + GLuint hsize; /* Tessellation control output size in urb registers */ + GLuint dsize; /* Tessellation evaluation output size in urb registers */ GLuint csize; /* constant buffer size in urb registers */ GLuint sfsize; /* setup data size in urb registers */ @@ -1354,22 +1039,37 @@ struct brw_context GLuint max_gs_entries; /* Maximum number of GS entries */ GLuint nr_vs_entries; + GLuint nr_hs_entries; + GLuint nr_ds_entries; GLuint nr_gs_entries; GLuint nr_clip_entries; GLuint nr_sf_entries; GLuint nr_cs_entries; GLuint vs_start; + GLuint hs_start; + GLuint ds_start; GLuint gs_start; GLuint clip_start; GLuint sf_start; GLuint cs_start; - GLuint size; /* Hardware URB size, in KB. */ + /** + * URB size in the current configuration. The units this is expressed + * in are somewhat inconsistent, see brw_device_info::urb::size. + * + * FINISHME: Represent the URB size consistently in KB on all platforms. + */ + GLuint size; /* True if the most recently sent _3DSTATE_URB message allocated * URB space for the GS. */ bool gs_present; + + /* True if the most recently sent _3DSTATE_URB message allocated + * URB space for the HS and DS. + */ + bool tess_present; } urb; @@ -1406,6 +1106,28 @@ struct brw_context struct brw_vs_prog_data *prog_data; } vs; + struct { + struct brw_stage_state base; + struct brw_tcs_prog_data *prog_data; + + /** + * True if the 3DSTATE_HS command most recently emitted to the 3D + * pipeline enabled the HS; false otherwise. + */ + bool enabled; + } tcs; + + struct { + struct brw_stage_state base; + struct brw_tes_prog_data *prog_data; + + /** + * True if the 3DSTATE_DS command most recently emitted to the 3D + * pipeline enabled the DS; false otherwise. + */ + bool enabled; + } tes; + struct { struct brw_stage_state base; struct brw_gs_prog_data *prog_data; @@ -1472,6 +1194,8 @@ struct brw_context */ drm_intel_bo *multisampled_null_render_target_bo; uint32_t fast_clear_op; + + float offset_clamp; } wm; struct { @@ -1540,8 +1264,8 @@ struct brw_context } perfmon; int num_atoms[BRW_NUM_PIPELINES]; - const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[7]; + const struct brw_tracked_state render_atoms[76]; + const struct brw_tracked_state compute_atoms[11]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { @@ -1583,6 +1307,10 @@ struct brw_context uint32_t num_instances; int basevertex; + struct { + const struct brw_l3_config *config; + } l3; + struct { drm_intel_bo *bo; const char **names; @@ -1640,9 +1368,10 @@ GLboolean brwCreateContext(gl_api api, /*====================================================================== * brw_misc_state.c */ -GLuint brw_get_rb_for_slice(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned level, unsigned layer, bool flat); +struct gl_renderbuffer *brw_get_rb_for_slice(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, + bool flat); void brw_meta_updownsample(struct brw_context *brw, struct intel_mipmap_tree *src, @@ -1733,9 +1462,24 @@ void brw_validate_textures( struct brw_context *brw ); /*====================================================================== * brw_program.c */ +static inline bool +key_debug(struct brw_context *brw, const char *name, int a, int b) +{ + if (a != b) { + perf_debug(" %s %d->%d\n", name, a, b); + return true; + } + return false; +} + void brwInitFragProgFuncs( struct dd_function_table *functions ); -int brw_get_scratch_size(int size); +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +static inline int +brw_get_scratch_size(int size) +{ + return util_next_power_of_two(size | 1023); +} void brw_get_scratch_bo(struct brw_context *brw, drm_intel_bo **scratch_bo, int size); void brw_init_shader_time(struct brw_context *brw); @@ -1800,14 +1544,12 @@ void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, uint32_t offset, uint32_t size, - uint32_t *out_offset, - bool dword_pitch); + uint32_t *out_offset); void brw_create_buffer_surface(struct brw_context *brw, drm_intel_bo *bo, uint32_t offset, uint32_t size, - uint32_t *out_offset, - bool dword_pitch); + uint32_t *out_offset); void brw_update_buffer_texture_surface(struct gl_context *ctx, unsigned unit, uint32_t *surf_offset); @@ -1819,10 +1561,9 @@ brw_update_sol_surface(struct brw_context *brw, void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_shader *shader, struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data, - bool dword_pitch); + struct brw_stage_prog_data *prog_data); void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data); void brw_upload_image_surfaces(struct brw_context *brw, @@ -1833,6 +1574,8 @@ void brw_upload_image_surfaces(struct brw_context *brw, /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); +bool brw_losslessly_compressible_format(const struct brw_context *brw, + uint32_t brw_format); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, mesa_format format); @@ -1934,12 +1677,18 @@ void gen8_emit_3dstate_sample_pattern(struct brw_context *brw); /* gen7_urb.c */ void gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, + unsigned hs_size, unsigned ds_size, unsigned gs_size, unsigned fs_size); void gen7_emit_urb_state(struct brw_context *brw, - unsigned nr_vs_entries, unsigned vs_size, - unsigned vs_start, unsigned nr_gs_entries, + unsigned nr_vs_entries, + unsigned vs_size, unsigned vs_start, + unsigned nr_hs_entries, + unsigned hs_size, unsigned hs_start, + unsigned nr_ds_entries, + unsigned ds_size, unsigned ds_start, + unsigned nr_gs_entries, unsigned gs_size, unsigned gs_start); @@ -1973,6 +1722,18 @@ brw_vertex_program_const(const struct gl_vertex_program *p) return (const struct brw_vertex_program *) p; } +static inline struct brw_tess_ctrl_program * +brw_tess_ctrl_program(struct gl_tess_ctrl_program *p) +{ + return (struct brw_tess_ctrl_program *) p; +} + +static inline struct brw_tess_eval_program * +brw_tess_eval_program(struct gl_tess_eval_program *p) +{ + return (struct brw_tess_eval_program *) p; +} + static inline struct brw_geometry_program * brw_geometry_program(struct gl_geometry_program *p) { @@ -2039,6 +1800,7 @@ struct opcode_desc { extern const struct opcode_desc opcode_descs[128]; extern const char * const conditional_modifier[16]; +extern const char *const pred_ctrl_align16[16]; void brw_emit_depthbuffer(struct brw_context *brw);