X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_fs.h;h=0843f6e73fc5b2b79785c501e7bccc26b2763e89;hb=90b6745bc80cf6dabb8f736dbf12d47c2a6602f5;hp=680bdc535ac1900b527eb1f36da8df6864911208;hpb=2fca325ea65f068043d4c18c9cd0fe7f25bde8f7;p=mesa.git diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 680bdc535ac..0843f6e73fc 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -31,6 +31,8 @@ #include "brw_shader.h" #include "brw_ir_fs.h" #include "brw_fs_builder.h" +#include "brw_fs_live_variables.h" +#include "brw_ir_performance.h" #include "compiler/nir/nir.h" struct bblock_t; @@ -38,8 +40,34 @@ namespace { struct acp_entry; } +class fs_visitor; + namespace brw { - class fs_live_variables; + /** + * Register pressure analysis of a shader. Estimates how many registers + * are live at any point of the program in GRF units. + */ + struct register_pressure { + register_pressure(const fs_visitor *v); + ~register_pressure(); + + analysis_dependency_class + dependency_class() const + { + return (DEPENDENCY_INSTRUCTION_IDENTITY | + DEPENDENCY_INSTRUCTION_DATA_FLOW | + DEPENDENCY_VARIABLES); + } + + bool + validate(const fs_visitor *) const + { + /* FINISHME */ + return true; + } + + unsigned *regs_live_at_ip; + }; } struct brw_gs_compile; @@ -97,9 +125,9 @@ public: bool run_tcs(); bool run_tes(); bool run_gs(); - bool run_cs(unsigned min_dispatch_width); + bool run_cs(bool allow_spilling); void optimize(); - void allocate_registers(unsigned min_dispatch_width, bool allow_spilling); + void allocate_registers(bool allow_spilling); void setup_fs_payload_gen4(); void setup_fs_payload_gen6(); void setup_vs_payload(); @@ -107,6 +135,7 @@ public: void setup_cs_payload(); bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); + bool fixup_nomask_control_flow(); void assign_curb_setup(); void assign_urb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); @@ -117,21 +146,20 @@ public: bool assign_regs(bool allow_spilling, bool spill_all); void assign_regs_trivial(); void calculate_payload_ranges(int payload_node_count, - int *payload_last_use_ip); + int *payload_last_use_ip) const; void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index, unsigned *out_pull_index); void lower_constant_loads(); - void invalidate_live_intervals(); - void calculate_live_intervals(); - void calculate_register_pressure(); + virtual void invalidate_analysis(brw::analysis_dependency_class c); void validate(); bool opt_algebraic(); bool opt_redundant_discard_jumps(); bool opt_cse(); - bool opt_cse_local(bblock_t *block); + bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip); + bool opt_copy_propagation(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); @@ -140,7 +168,6 @@ public: bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool opt_bank_conflicts(); - unsigned bank_conflict_cycles(const fs_inst *inst) const; bool register_coalesce(); bool compute_to_mrf(); bool eliminate_find_live_channel(); @@ -148,8 +175,6 @@ public: bool remove_duplicate_mrf_writes(); bool remove_extra_rounding_modes(); - bool opt_sampler_eot(); - bool virtual_grf_interferes(int a, int b); void schedule_instructions(instruction_scheduler_mode mode); void insert_gen4_send_dependency_workarounds(); void insert_gen4_pre_send_dependency_workarounds(bblock_t *block, @@ -167,7 +192,9 @@ public: bool lower_integer_multiplication(); bool lower_minmax(); bool lower_simd_width(); + bool lower_barycentrics(); bool lower_scoreboard(); + bool lower_sub_sat(); bool opt_combine_constants(); void emit_dummy_fs(); @@ -302,12 +329,10 @@ public: fs_reg interp_reg(int location, int channel); - int implied_mrf_writes(const fs_inst *inst) const; - - virtual void dump_instructions(); - virtual void dump_instructions(const char *name); - void dump_instruction(backend_instruction *inst); - void dump_instruction(backend_instruction *inst, FILE *file); + virtual void dump_instructions() const; + virtual void dump_instructions(const char *name) const; + void dump_instruction(const backend_instruction *inst) const; + void dump_instruction(const backend_instruction *inst, FILE *file) const; const brw_base_prog_key *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -318,11 +343,14 @@ public: const struct brw_vue_map *input_vue_map; - int *virtual_grf_start; - int *virtual_grf_end; - brw::fs_live_variables *live_intervals; + int *param_size; - int *regs_live_at_ip; + BRW_ANALYSIS(live_analysis, brw::fs_live_variables, + backend_shader *) live_analysis; + BRW_ANALYSIS(regpressure_analysis, brw::register_pressure, + fs_visitor *) regpressure_analysis; + BRW_ANALYSIS(performance_analysis, brw::performance, + fs_visitor *) performance_analysis; /** Number of uniform variable components visited. */ unsigned uniforms; @@ -343,6 +371,7 @@ public: int *push_constant_loc; fs_reg subgroup_id; + fs_reg group_size[3]; fs_reg scratch_base; fs_reg frag_depth; fs_reg frag_stencil; @@ -412,8 +441,23 @@ private: void lower_mul_dword_inst(fs_inst *inst, bblock_t *block); void lower_mul_qword_inst(fs_inst *inst, bblock_t *block); void lower_mulh_inst(fs_inst *inst, bblock_t *block); + + unsigned workgroup_size() const; }; +/** + * Return the flag register used in fragment shaders to keep track of live + * samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32 + * dispatch mode, while earlier generations are constrained to f0.1, which + * limits the dispatch width to SIMD16 for fragment shaders that use discard. + */ +static inline unsigned +sample_mask_flag_subreg(const fs_visitor *shader) +{ + assert(shader->stage == MESA_SHADER_FRAGMENT); + return shader->devinfo->gen >= 7 ? 2 : 1; +} + /** * The fragment shader code generator. * @@ -425,14 +469,16 @@ public: fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, struct brw_stage_prog_data *prog_data, - struct shader_stats shader_stats, bool runtime_check_aads_emit, gl_shader_stage stage); ~fs_generator(); void enable_debug(const char *shader_name); int generate_code(const cfg_t *cfg, int dispatch_width, + struct shader_stats shader_stats, + const brw::performance &perf, struct brw_compile_stats *stats); + void add_const_data(void *data, unsigned size); const unsigned *get_assembly(); private: @@ -530,7 +576,6 @@ private: unsigned dispatch_width; /**< 8, 16 or 32 */ exec_list discard_halt_patches; - struct shader_stats shader_stats; bool runtime_check_aads_emit; bool debug_flag; const char *shader_name; @@ -541,25 +586,21 @@ private: namespace brw { inline fs_reg fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], - brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1) + brw_reg_type type = BRW_REGISTER_TYPE_F) { if (!regs[0]) return fs_reg(); if (bld.dispatch_width() > 16) { - const fs_reg tmp = bld.vgrf(type, n); + const fs_reg tmp = bld.vgrf(type); const brw::fs_builder hbld = bld.exec_all().group(16, 0); const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); - fs_reg *const components = new fs_reg[n * m]; + fs_reg *const components = new fs_reg[m]; - for (unsigned c = 0; c < n; c++) { - for (unsigned g = 0; g < m; g++) { - components[c * m + g] = - offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c); - } - } + for (unsigned g = 0; g < m; g++) + components[g] = retype(brw_vec8_grf(regs[g], 0), type); - hbld.LOAD_PAYLOAD(tmp, components, n * m, 0); + hbld.LOAD_PAYLOAD(tmp, components, m, 0); delete[] components; return tmp; @@ -569,6 +610,29 @@ namespace brw { } } + inline fs_reg + fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]) + { + if (!regs[0]) + return fs_reg(); + + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2); + const brw::fs_builder hbld = bld.exec_all().group(8, 0); + const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); + fs_reg *const components = new fs_reg[2 * m]; + + for (unsigned c = 0; c < 2; c++) { + for (unsigned g = 0; g < m; g++) + components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0), + hbld, c + 2 * (g % 2)); + } + + hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0); + + delete[] components; + return tmp; + } + bool lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i); } @@ -594,5 +658,6 @@ enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode, uint32_t brw_fb_write_msg_control(const fs_inst *inst, const struct brw_wm_prog_data *prog_data); +void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data); #endif /* BRW_FS_H */