X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_fs.h;h=c09c4eb87590906919fe36942ee0a45a5f26047d;hb=62795475e8f45f92bb8f467d9e2318fdfdba6297;hp=779170ecc952d2e9031cb66b1c95c9027a8905bd;hpb=22c654941b576785d2e009bf64aa20fea758de58;p=mesa.git diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 779170ecc95..c09c4eb8759 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -31,6 +31,7 @@ #include "brw_shader.h" #include "brw_ir_fs.h" #include "brw_fs_builder.h" +#include "brw_fs_live_variables.h" #include "compiler/nir/nir.h" struct bblock_t; @@ -38,8 +39,34 @@ namespace { struct acp_entry; } +class fs_visitor; + namespace brw { - class fs_live_variables; + /** + * Register pressure analysis of a shader. Estimates how many registers + * are live at any point of the program in GRF units. + */ + struct register_pressure { + register_pressure(const fs_visitor *v); + ~register_pressure(); + + analysis_dependency_class + dependency_class() const + { + return (DEPENDENCY_INSTRUCTION_IDENTITY | + DEPENDENCY_INSTRUCTION_DATA_FLOW | + DEPENDENCY_VARIABLES); + } + + bool + validate(const fs_visitor *) const + { + /* FINISHME */ + return true; + } + + unsigned *regs_live_at_ip; + }; } struct brw_gs_compile; @@ -52,6 +79,11 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta) #define UBO_START ((1 << 16) - 4) +struct shader_stats { + const char *scheduler_mode; + unsigned promoted_constants; +}; + /** * The fragment shader front-end. * @@ -62,9 +94,8 @@ class fs_visitor : public backend_shader public: fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - const void *key, + const brw_base_prog_key *key, struct brw_stage_prog_data *prog_data, - struct gl_program *prog, const nir_shader *shader, unsigned dispatch_width, int shader_time_index, @@ -80,12 +111,6 @@ public: fs_reg vgrf(const glsl_type *const type); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(); - void compute_clip_distance(); - - fs_inst *get_instruction_generating_reg(fs_inst *start, - fs_inst *end, - const fs_reg ®); void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, const fs_reg &dst, @@ -96,7 +121,7 @@ public: bool run_fs(bool allow_spilling, bool do_rep_send); bool run_vs(); - bool run_tcs_single_patch(); + bool run_tcs(); bool run_tes(); bool run_gs(); bool run_cs(unsigned min_dispatch_width); @@ -107,37 +132,33 @@ public: void setup_vs_payload(); void setup_gs_payload(); void setup_cs_payload(); + bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); + bool fixup_nomask_control_flow(); void assign_curb_setup(); - void calculate_urb_setup(); void assign_urb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); void assign_vs_urb_setup(); - void assign_tcs_single_patch_urb_setup(); + void assign_tcs_urb_setup(); void assign_tes_urb_setup(); void assign_gs_urb_setup(); bool assign_regs(bool allow_spilling, bool spill_all); void assign_regs_trivial(); void calculate_payload_ranges(int payload_node_count, - int *payload_last_use_ip); - void setup_payload_interference(struct ra_graph *g, int payload_reg_count, - int first_payload_node); - int choose_spill_reg(struct ra_graph *g); - void spill_reg(int spill_reg); + int *payload_last_use_ip) const; void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index, unsigned *out_pull_index); void lower_constant_loads(); - void invalidate_live_intervals(); - void calculate_live_intervals(); - void calculate_register_pressure(); + virtual void invalidate_analysis(brw::analysis_dependency_class c); void validate(); bool opt_algebraic(); bool opt_redundant_discard_jumps(); bool opt_cse(); - bool opt_cse_local(bblock_t *block); + bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip); + bool opt_copy_propagation(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); @@ -155,7 +176,6 @@ public: bool remove_extra_rounding_modes(); bool opt_sampler_eot(); - bool virtual_grf_interferes(int a, int b); void schedule_instructions(instruction_scheduler_mode mode); void insert_gen4_send_dependency_workarounds(); void insert_gen4_pre_send_dependency_workarounds(bblock_t *block, @@ -168,11 +188,14 @@ public: void lower_uniform_pull_constant_loads(); bool lower_load_payload(); bool lower_pack(); - bool lower_conversions(); + bool lower_regioning(); bool lower_logical_sends(); bool lower_integer_multiplication(); bool lower_minmax(); bool lower_simd_width(); + bool lower_barycentrics(); + bool lower_scoreboard(); + bool lower_sub_sat(); bool opt_combine_constants(); void emit_dummy_fs(); @@ -186,17 +209,22 @@ public: void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &sampler); + const fs_reg &texture, + const fs_reg &texture_handle); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); fs_reg resolve_source_modifiers(const fs_reg &src); void emit_discard_jump(); + void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr, + fs_reg result, fs_reg *op, unsigned fsign_src); + void emit_shader_float_controls_execution_mode(); bool opt_peephole_sel(); - bool opt_peephole_csel(); bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); bool opt_cmod_propagation(); bool opt_zero_samples(); + void set_tcs_invocation_id(); + void emit_nir_code(); void nir_setup_outputs(); void nir_setup_uniforms(); @@ -207,7 +235,10 @@ public: void nir_emit_loop(nir_loop *loop); void nir_emit_block(nir_block *block); void nir_emit_instr(nir_instr *instr); - void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); + void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr, + bool need_dest); + bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result, + nir_alu_instr *instr); void nir_emit_load_const(const brw::fs_builder &bld, nir_load_const_instr *instr); void nir_emit_vs_intrinsic(const brw::fs_builder &bld, @@ -220,14 +251,29 @@ public: nir_intrinsic_instr *instr); void nir_emit_cs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld, + const fs_reg &addr, + bool in_dwords); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_tes_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_shared_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_shared_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); + void nir_emit_global_atomic(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); + void nir_emit_global_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, @@ -235,8 +281,13 @@ public: fs_reg get_nir_src(const nir_src &src); fs_reg get_nir_src_imm(const nir_src &src); fs_reg get_nir_dest(const nir_dest &dest); - fs_reg get_nir_image_deref(const nir_deref_var *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); + fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + struct brw_reg get_tcs_output_urb_handle(); + void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); @@ -249,6 +300,7 @@ public: fs_inst *emit_single_fb_write(const brw::fs_builder &bld, fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components); + void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha); void emit_fb_writes(); fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld, const fs_reg &dst, unsigned target); @@ -278,28 +330,26 @@ public: fs_reg interp_reg(int location, int channel); - int implied_mrf_writes(fs_inst *inst) const; - - virtual void dump_instructions(); - virtual void dump_instructions(const char *name); - void dump_instruction(backend_instruction *inst); - void dump_instruction(backend_instruction *inst, FILE *file); + virtual void dump_instructions() const; + virtual void dump_instructions(const char *name) const; + void dump_instruction(const backend_instruction *inst) const; + void dump_instruction(const backend_instruction *inst, FILE *file) const; - const void *const key; + const brw_base_prog_key *const key; const struct brw_sampler_prog_key_data *key_tex; struct brw_gs_compile *gs_compile; struct brw_stage_prog_data *prog_data; - struct gl_program *prog; const struct brw_vue_map *input_vue_map; - int *virtual_grf_start; - int *virtual_grf_end; - brw::fs_live_variables *live_intervals; + int *param_size; - int *regs_live_at_ip; + BRW_ANALYSIS(live_analysis, brw::fs_live_variables, + backend_shader *) live_analysis; + BRW_ANALYSIS(regpressure_analysis, brw::register_pressure, + fs_visitor *) regpressure_analysis; /** Number of uniform variable components visited. */ unsigned uniforms; @@ -320,6 +370,7 @@ public: int *push_constant_loc; fs_reg subgroup_id; + fs_reg scratch_base; fs_reg frag_depth; fs_reg frag_stencil; fs_reg sample_mask; @@ -338,14 +389,15 @@ public: /** Register numbers for thread payload fields. */ struct thread_payload { - uint8_t source_depth_reg; - uint8_t source_w_reg; - uint8_t aa_dest_stencil_reg; - uint8_t dest_depth_reg; - uint8_t sample_pos_reg; - uint8_t sample_mask_in_reg; - uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT]; - uint8_t local_invocation_id_reg; + uint8_t subspan_coord_reg[2]; + uint8_t source_depth_reg[2]; + uint8_t source_w_reg[2]; + uint8_t aa_dest_stencil_reg[2]; + uint8_t dest_depth_reg[2]; + uint8_t sample_pos_reg[2]; + uint8_t sample_mask_in_reg[2]; + uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2]; + uint8_t local_invocation_id_reg[2]; /** The number of thread payload registers the hardware will supply. */ uint8_t num_regs; @@ -360,7 +412,6 @@ public: fs_reg pixel_w; fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT]; fs_reg shader_start_time; - fs_reg userplane[MAX_CLIP_PLANES]; fs_reg final_gs_vertex_count; fs_reg control_data_bits; fs_reg invocation_id; @@ -373,10 +424,38 @@ public: int shader_time_index; - unsigned promoted_constants; + struct shader_stats shader_stats; + brw::fs_builder bld; + +private: + fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld, + nir_alu_instr *instr, + fs_reg *op, + bool need_dest); + + void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr, + fs_reg *op); + void lower_mul_dword_inst(fs_inst *inst, bblock_t *block); + void lower_mul_qword_inst(fs_inst *inst, bblock_t *block); + void lower_mulh_inst(fs_inst *inst, bblock_t *block); + + unsigned workgroup_size() const; }; +/** + * Return the flag register used in fragment shaders to keep track of live + * samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32 + * dispatch mode, while earlier generations are constrained to f0.1, which + * limits the dispatch width to SIMD16 for fragment shaders that use discard. + */ +static inline unsigned +sample_mask_flag_subreg(const fs_visitor *shader) +{ + assert(shader->stage == MESA_SHADER_FRAGMENT); + return shader->devinfo->gen >= 7 ? 2 : 1; +} + /** * The fragment shader code generator. * @@ -387,15 +466,15 @@ class fs_generator public: fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - const void *key, struct brw_stage_prog_data *prog_data, - unsigned promoted_constants, bool runtime_check_aads_emit, gl_shader_stage stage); ~fs_generator(); void enable_debug(const char *shader_name); - int generate_code(const cfg_t *cfg, int dispatch_width); + int generate_code(const cfg_t *cfg, int dispatch_width, + struct shader_stats shader_stats, + struct brw_compile_stats *stats); const unsigned *get_assembly(); private: @@ -403,6 +482,12 @@ private: struct brw_reg payload, struct brw_reg implied_header, GLuint nr); + void generate_send(fs_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2); void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_fb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); @@ -412,7 +497,7 @@ private: void generate_barrier(fs_inst *inst, struct brw_reg src); bool generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, @@ -435,10 +520,6 @@ private: void generate_varying_pull_constant_load_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg index); - void generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); void generate_pixel_interpolator_query(fs_inst *inst, @@ -458,9 +539,6 @@ private: struct brw_reg dst, struct brw_reg x, struct brw_reg y); - void generate_unpack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src); void generate_shader_time_add(fs_inst *inst, struct brw_reg payload, @@ -477,6 +555,10 @@ private: struct brw_reg src, struct brw_reg idx); + void generate_quad_swizzle(const fs_inst *inst, + struct brw_reg dst, struct brw_reg src, + unsigned swiz); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; @@ -485,13 +567,11 @@ private: const struct gen_device_info *devinfo; struct brw_codegen *p; - const void * const key; struct brw_stage_prog_data * const prog_data; unsigned dispatch_width; /**< 8, 16 or 32 */ exec_list discard_halt_patches; - unsigned promoted_constants; bool runtime_check_aads_emit; bool debug_flag; const char *shader_name; @@ -499,25 +579,59 @@ private: void *mem_ctx; }; -void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, - uint32_t components); - -fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld, - const fs_reg &src, - uint32_t components); - -void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, - uint32_t first_component, - uint32_t components); - -void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld, - const fs_reg &dst, - const fs_reg &src, - uint32_t components); +namespace brw { + inline fs_reg + fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], + brw_reg_type type = BRW_REGISTER_TYPE_F) + { + if (!regs[0]) + return fs_reg(); + + if (bld.dispatch_width() > 16) { + const fs_reg tmp = bld.vgrf(type); + const brw::fs_builder hbld = bld.exec_all().group(16, 0); + const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); + fs_reg *const components = new fs_reg[m]; + + for (unsigned g = 0; g < m; g++) + components[g] = retype(brw_vec8_grf(regs[g], 0), type); + + hbld.LOAD_PAYLOAD(tmp, components, m, 0); + + delete[] components; + return tmp; + + } else { + return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); + } + } + + inline fs_reg + fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]) + { + if (!regs[0]) + return fs_reg(); + + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2); + const brw::fs_builder hbld = bld.exec_all().group(8, 0); + const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); + fs_reg *const components = new fs_reg[2 * m]; + + for (unsigned c = 0; c < 2; c++) { + for (unsigned g = 0; g < m; g++) + components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0), + hbld, c + 2 * (g % 2)); + } + + hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0); + + delete[] components; + return tmp; + } + + bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i); +} void shuffle_from_32bit_read(const brw::fs_builder &bld, const fs_reg &dst, @@ -525,15 +639,21 @@ void shuffle_from_32bit_read(const brw::fs_builder &bld, uint32_t first_component, uint32_t components); -fs_reg shuffle_for_32bit_write(const brw::fs_builder &bld, - const fs_reg &src, - uint32_t first_component, - uint32_t components); - fs_reg setup_imm_df(const brw::fs_builder &bld, double v); +fs_reg setup_imm_b(const brw::fs_builder &bld, + int8_t v); + +fs_reg setup_imm_ub(const brw::fs_builder &bld, + uint8_t v); + enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op); +uint32_t brw_fb_write_msg_control(const fs_inst *inst, + const struct brw_wm_prog_data *prog_data); + +void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data); + #endif /* BRW_FS_H */