X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_compiler.h;h=ccbb5cc640903e87ab68642868250ae87cf183bf;hp=614410e3fb7c4f3ea4e711cbfe72cf8bf101e0f0;hb=6c11a7994dd8937035f4f9e49b63dd18b015bce6;hpb=2afedfaf9aa161f8e8acbd1e8048a540db5fcfc8 diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 614410e3fb7..ccbb5cc6409 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -83,10 +83,10 @@ struct brw_compiler { uint8_t *ra_reg_to_grf; /** - * ra class for the aligned pairs we use for PLN, which doesn't + * ra class for the aligned barycentrics we use for PLN, which doesn't * appear in *classes. */ - int aligned_pairs_class; + int aligned_bary_class; } fs_reg_sets[3]; void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); @@ -119,6 +119,18 @@ struct brw_compiler { * whether nir_opt_large_constants will be run. */ bool supports_shader_constants; + + /** + * Whether or not the driver wants uniform params to be compacted by the + * back-end compiler. + */ + bool compact_params; + + /** + * Whether or not the driver wants variable group size to be lowered by the + * back-end compiler. + */ + bool lower_variable_group_size; }; /** @@ -198,6 +210,8 @@ struct brw_sampler_prog_key_data { uint32_t xy_uxvx_image_mask; uint32_t ayuv_image_mask; uint32_t xyuv_image_mask; + uint32_t bt709_mask; + uint32_t bt2020_mask; /* Scale factor for each texture. */ float scale_factors[32]; @@ -445,6 +459,7 @@ struct brw_wm_prog_key { bool high_quality_derivatives:1; bool force_dual_color_blend:1; bool coherent_fb_fetch:1; + bool ignore_sample_mask_out:1; uint8_t color_outputs_valid; uint64_t input_slots_valid; @@ -609,6 +624,9 @@ enum brw_param_builtin { BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y, BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z, BRW_PARAM_BUILTIN_SUBGROUP_ID, + BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X, + BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Y, + BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z, }; #define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \ @@ -648,12 +666,28 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + /* zero_push_reg is a bitfield which indicates what push registers (if any) + * should be zeroed by SW at the start of the shader. The corresponding + * push_reg_mask_param specifies the param index (in 32-bit units) where + * the actual runtime 64-bit mask will be pushed. The shader will zero + * push reg i if + * + * reg_used & zero_push_reg & ~*push_reg_mask_param & (1ull << i) + * + * If this field is set, brw_compiler::compact_params must be false. + */ + uint64_t zero_push_reg; + unsigned push_reg_mask_param; + unsigned curb_read_length; unsigned total_scratch; unsigned total_shared; unsigned program_size; + /** Does this program pull from any UBO or other constant buffers? */ + bool has_ubo_pull; + /** * Register where the thread expects to find input data from the URB * (typically uniforms, followed by vertex or fragment attributes). @@ -670,6 +704,9 @@ struct brw_stage_prog_data { */ uint32_t *param; uint32_t *pull_param; + + /* Whether shader uses atomic operations. */ + bool uses_atomic_load_store; }; static inline uint32_t * @@ -742,7 +779,6 @@ struct brw_wm_prog_data { bool dispatch_16; bool dispatch_32; bool dual_src_blend; - bool replicate_alpha; bool persample_dispatch; bool uses_pos_offset; bool uses_omask; @@ -769,6 +805,11 @@ struct brw_wm_prog_data { */ uint32_t flat_inputs; + /** + * The FS inputs + */ + uint64_t inputs; + /* Mapping of VUE slots to interpolation modes. * Used by the Gen4-5 clip/sf/wm stages. */ @@ -780,6 +821,14 @@ struct brw_wm_prog_data { * For varying slots that are not used by the FS, the value is -1. */ int urb_setup[VARYING_SLOT_MAX]; + + /** + * Cache structure into the urb_setup array above that contains the + * attribute numbers of active varyings out of urb_setup. + * The actual count is stored in urb_setup_attribs_count. + */ + uint8_t urb_setup_attribs[VARYING_SLOT_MAX]; + uint8_t urb_setup_attribs_count; }; /** Returns the SIMD width corresponding to a given KSP index @@ -877,15 +926,26 @@ struct brw_cs_prog_data { struct brw_stage_prog_data base; unsigned local_size[3]; - unsigned simd_size; - unsigned threads; + unsigned slm_size; + + /* Program offsets for the 8/16/32 SIMD variants. Multiple variants are + * kept when using variable group size, and the right one can only be + * decided at dispatch time. + */ + unsigned prog_offset[3]; + + /* Bitmask indicating which program offsets are valid. */ + unsigned prog_mask; + + /* Bitmask indicating which programs have spilled. */ + unsigned prog_spilled; + bool uses_barrier; bool uses_num_work_groups; struct { struct brw_push_const_block cross_thread; struct brw_push_const_block per_thread; - struct brw_push_const_block total; } push; struct { @@ -897,6 +957,18 @@ struct brw_cs_prog_data { } binding_table; }; +static inline uint32_t +brw_cs_prog_data_prog_offset(const struct brw_cs_prog_data *prog_data, + unsigned dispatch_width) +{ + assert(dispatch_width == 8 || + dispatch_width == 16 || + dispatch_width == 32); + const unsigned index = dispatch_width / 16; + assert(prog_data->prog_mask & (1 << index)); + return prog_data->prog_offset[index]; +} + /** * Enum representing the i965-specific vertex results that don't correspond * exactly to any element of gl_varying_slot. The values of this enum are @@ -1020,7 +1092,8 @@ GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying) void brw_compute_vue_map(const struct gen_device_info *devinfo, struct brw_vue_map *vue_map, uint64_t slots_valid, - bool separate_shader); + bool separate_shader, + uint32_t pos_slots); void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map, uint64_t slots_valid, @@ -1116,6 +1189,9 @@ struct brw_tcs_prog_data /** Number vertices in output patch */ int instances; + + /** Track patch count threshold */ + int patch_count_threshold; }; @@ -1222,11 +1298,16 @@ union brw_any_prog_data { struct brw_cs_prog_data cs; }; -#define DEFINE_PROG_DATA_DOWNCAST(stage) \ -static inline struct brw_##stage##_prog_data * \ -brw_##stage##_prog_data(struct brw_stage_prog_data *prog_data) \ -{ \ - return (struct brw_##stage##_prog_data *) prog_data; \ +#define DEFINE_PROG_DATA_DOWNCAST(stage) \ +static inline struct brw_##stage##_prog_data * \ +brw_##stage##_prog_data(struct brw_stage_prog_data *prog_data) \ +{ \ + return (struct brw_##stage##_prog_data *) prog_data; \ +} \ +static inline const struct brw_##stage##_prog_data * \ +brw_##stage##_prog_data_const(const struct brw_stage_prog_data *prog_data) \ +{ \ + return (const struct brw_##stage##_prog_data *) prog_data; \ } DEFINE_PROG_DATA_DOWNCAST(vue) DEFINE_PROG_DATA_DOWNCAST(vs) @@ -1240,6 +1321,16 @@ DEFINE_PROG_DATA_DOWNCAST(clip) DEFINE_PROG_DATA_DOWNCAST(sf) #undef DEFINE_PROG_DATA_DOWNCAST +struct brw_compile_stats { + uint32_t dispatch_width; /**< 0 for vec4 */ + uint32_t instructions; + uint32_t sends; + uint32_t loops; + uint32_t cycles; + uint32_t spills; + uint32_t fills; +}; + /** @} */ struct brw_compiler * @@ -1278,6 +1369,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, struct brw_vs_prog_data *prog_data, struct nir_shader *shader, int shader_time_index, + struct brw_compile_stats *stats, char **error_str); /** @@ -1293,6 +1385,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, struct brw_tcs_prog_data *prog_data, struct nir_shader *nir, int shader_time_index, + struct brw_compile_stats *stats, char **error_str); /** @@ -1307,8 +1400,8 @@ brw_compile_tes(const struct brw_compiler *compiler, void *log_data, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, struct nir_shader *shader, - struct gl_program *prog, int shader_time_index, + struct brw_compile_stats *stats, char **error_str); /** @@ -1324,6 +1417,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, struct nir_shader *shader, struct gl_program *prog, int shader_time_index, + struct brw_compile_stats *stats, char **error_str); /** @@ -1369,12 +1463,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, struct nir_shader *shader, - struct gl_program *prog, int shader_time_index8, int shader_time_index16, int shader_time_index32, bool allow_spilling, bool use_rep_send, struct brw_vue_map *vue_map, + struct brw_compile_stats *stats, /**< Array of three stats */ char **error_str); /** @@ -1389,6 +1483,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, struct brw_cs_prog_data *prog_data, const struct nir_shader *shader, int shader_time_index, + struct brw_compile_stats *stats, char **error_str); void brw_debug_key_recompile(const struct brw_compiler *c, void *log, @@ -1428,6 +1523,28 @@ encode_slm_size(unsigned gen, uint32_t bytes) return slm_size; } +unsigned +brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data, + unsigned threads); + +unsigned +brw_cs_simd_size_for_group_size(const struct gen_device_info *devinfo, + const struct brw_cs_prog_data *cs_prog_data, + unsigned group_size); + +/** + * Calculate the RightExecutionMask field used in GPGPU_WALKER. + */ +static inline unsigned +brw_cs_right_mask(unsigned group_size, unsigned simd_size) +{ + const uint32_t remainder = group_size & (simd_size - 1); + if (remainder > 0) + return ~0u >> (32 - remainder); + else + return ~0u >> (32 - simd_size); +} + /** * Return true if the given shader stage is dispatched contiguously by the * relevant fixed function starting from channel 0 of the SIMD thread, which @@ -1444,7 +1561,7 @@ brw_stage_has_packed_dispatch(ASSERTED const struct gen_device_info *devinfo, * to do a full test run with brw_fs_test_dispatch_packing() hooked up to * the NIR front-end before changing this assertion. */ - assert(devinfo->gen <= 11); + assert(devinfo->gen <= 12); switch (stage) { case MESA_SHADER_FRAGMENT: {