X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fbroadcom%2Fcompiler%2Fv3d_compiler.h;h=2bc07de7e1f3ed31ee52e1b149e0ebbbaef43423;hp=f3e3875c87c9fbc711eec383b09cc05f19b90d54;hb=dc402be73e4bbb9b43213a4266b98a46092e3038;hpb=beeb94402f9d33081147c88de2b9d4c4ea24e842 diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index f3e3875c87c..2bc07de7e1f 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -34,6 +34,7 @@ #include "util/macros.h" #include "common/v3d_debug.h" #include "common/v3d_device_info.h" +#include "common/v3d_limits.h" #include "compiler/nir/nir.h" #include "util/list.h" #include "util/u_math.h" @@ -41,11 +42,6 @@ #include "qpu/qpu_instr.h" #include "pipe/p_state.h" -#define V3D_MAX_TEXTURE_SAMPLERS 32 -#define V3D_MAX_SAMPLES 4 -#define V3D_MAX_FS_INPUTS 64 -#define V3D_MAX_VS_INPUTS 64 - struct nir_builder; struct v3d_fs_inputs { @@ -73,9 +69,6 @@ enum qfile { * or physical registers later. */ QFILE_TEMP, - QFILE_UNIF, - QFILE_TLB, - QFILE_TLBU, /** * VPM reads use this with an index value to say what part of the VPM @@ -109,12 +102,23 @@ static inline struct qreg vir_reg(enum qfile file, uint32_t index) return (struct qreg){file, index}; } +static inline struct qreg vir_magic_reg(uint32_t index) +{ + return (struct qreg){QFILE_MAGIC, index}; +} + +static inline struct qreg vir_nop_reg(void) +{ + return (struct qreg){QFILE_NULL, 0}; +} + /** * A reference to an actual register at the QPU level, for register * allocation. */ struct qpu_reg { bool magic; + bool smimm; int index; }; @@ -132,12 +136,11 @@ struct qinst { /* Pre-register-allocation references to src/dst registers */ struct qreg dst; struct qreg src[3]; - bool cond_is_exec_mask; - bool has_implicit_uniform; bool is_last_thrsw; - /* After vir_to_qpu.c: If instr reads a uniform, which uniform from - * the uncompiled stream it is. + /* If the instruction reads a uniform (other than through src[i].file + * == QFILE_UNIF), that uniform's index in c->uniform_contents. ~0 + * otherwise. */ int uniform; }; @@ -222,13 +225,15 @@ enum quniform_contents { */ QUNIFORM_TEXTURE_CONFIG_P1, - /* A a V3D 4.x texture config parameter. The high 8 bits will be + /* A V3D 4.x texture config parameter. The high 8 bits will be * which texture or sampler is being sampled, and the driver must * replace the address field with the appropriate address. */ QUNIFORM_TMU_CONFIG_P0, QUNIFORM_TMU_CONFIG_P1, + QUNIFORM_IMAGE_TMU_CONFIG_P0, + QUNIFORM_TEXTURE_FIRST_LEVEL, QUNIFORM_TEXTURE_WIDTH, @@ -242,18 +247,56 @@ enum quniform_contents { QUNIFORM_TEXRECT_SCALE_X, QUNIFORM_TEXRECT_SCALE_Y, - QUNIFORM_TEXTURE_BORDER_COLOR, + /* Returns the base offset of the SSBO given by the data value. */ + QUNIFORM_SSBO_OFFSET, + + /* Returns the size of the SSBO given by the data value. */ + QUNIFORM_GET_BUFFER_SIZE, + + /* Sizes (in pixels) of a shader image given by the data value. */ + QUNIFORM_IMAGE_WIDTH, + QUNIFORM_IMAGE_HEIGHT, + QUNIFORM_IMAGE_DEPTH, + QUNIFORM_IMAGE_ARRAY_SIZE, QUNIFORM_ALPHA_REF, - QUNIFORM_SAMPLE_MASK, + + /* Number of workgroups passed to glDispatchCompute in the dimension + * selected by the data value. + */ + QUNIFORM_NUM_WORK_GROUPS, /** * Returns the the offset of the scratch buffer for register spilling. */ QUNIFORM_SPILL_OFFSET, QUNIFORM_SPILL_SIZE_PER_THREAD, + + /** + * Returns the offset of the shared memory for compute shaders. + * + * This will be accessed using TMU general memory operations, so the + * L2T cache will effectively be the shared memory area. + */ + QUNIFORM_SHARED_OFFSET, }; +static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) +{ + assert(value < (1 << 24)); + return unit << 24 | value; +} + +static inline uint32_t v3d_unit_data_get_unit(uint32_t data) +{ + return data >> 24; +} + +static inline uint32_t v3d_unit_data_get_offset(uint32_t data) +{ + return data & 0xffffff; +} + struct v3d_varying_slot { uint8_t slot_and_component; }; @@ -275,33 +318,12 @@ static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot) return slot.slot_and_component & 3; } -struct v3d_ubo_range { - /** - * offset in bytes from the start of the ubo where this range is - * uploaded. - * - * Only set once used is set. - */ - uint32_t dst_offset; - - /** - * offset in bytes from the start of the gallium uniforms where the - * data comes from. - */ - uint32_t src_offset; - - /** size in bytes of this ubo range */ - uint32_t size; -}; - struct v3d_key { void *shader_state; struct { uint8_t swizzle[4]; uint8_t return_size; uint8_t return_channels; - unsigned compare_mode:1; - unsigned compare_func:3; bool clamp_s:1; bool clamp_t:1; bool clamp_r:1; @@ -323,7 +345,8 @@ struct v3d_fs_key { bool sample_alpha_to_one; bool clamp_color; bool shade_model_flat; - uint8_t nr_cbufs; + /* Mask of which color render targets are present. */ + uint8_t cbufs; uint8_t swap_color_rb; /* Mask of which render targets need to be written as 32-bit floats */ uint8_t f32_color_rb; @@ -379,6 +402,8 @@ struct qblock { /** @{ used by v3d_vir_live_variables.c */ BITSET_WORD *def; + BITSET_WORD *defin; + BITSET_WORD *defout; BITSET_WORD *use; BITSET_WORD *live_in; BITSET_WORD *live_out; @@ -435,6 +460,8 @@ vir_after_block(struct qblock *block) struct v3d_compiler { const struct v3d_device_info *devinfo; struct ra_regs *regs; + unsigned int reg_class_any[3]; + unsigned int reg_class_r5[3]; unsigned int reg_class_phys[3]; unsigned int reg_class_phys_or_acc[3]; }; @@ -446,6 +473,10 @@ struct v3d_compile { struct exec_list *cf_node_list; const struct v3d_compiler *compiler; + void (*debug_output)(const char *msg, + void *debug_output_data); + void *debug_output_data; + /** * Mapping from nir_register * or nir_ssa_def * to array of struct * qreg for the values. @@ -481,18 +512,13 @@ struct v3d_compile { uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; bool uses_center_w; - - struct v3d_ubo_range *ubo_ranges; - bool *ubo_range_used; - uint32_t ubo_ranges_array_size; - /** Number of uniform areas tracked in ubo_ranges. */ - uint32_t num_ubo_ranges; - uint32_t next_ubo_dst_offset; + bool writes_z; /* State for whether we're executing on each channel currently. 0 if * yes, otherwise a block number + 1 that the channel jumped to. */ struct qreg execute; + bool in_control_flow; struct qreg line_x, point_x, point_y; @@ -511,15 +537,19 @@ struct v3d_compile { /* Fragment shader payload regs. */ struct qreg payload_w, payload_w_centroid, payload_z; - uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; - uint32_t num_vpm_writes; + struct qreg cs_payload[2]; + struct qreg cs_shared_offset; + int local_invocation_index_bits; + + uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; + uint32_t vpm_output_size; /* Size in bytes of registers that have been spilled. This is how much * space needs to be available in the spill BO per thread per QPU. */ uint32_t spill_size; - /* Shader-db stats for register spilling. */ - uint32_t spills, fills; + /* Shader-db stats */ + uint32_t spills, fills, loops; /** * Register spilling's per-thread base address, shared between each * spill/fill's addressing calculations. @@ -556,10 +586,8 @@ struct v3d_compile { enum quniform_contents *uniform_contents; uint32_t uniform_array_size; uint32_t num_uniforms; - uint32_t num_outputs; uint32_t output_position_index; nir_variable *output_color_var[4]; - uint32_t output_point_size_index; uint32_t output_sample_mask_index; struct qreg undef; @@ -581,18 +609,6 @@ struct v3d_compile { */ uint32_t num_inputs; - /** - * Number of inputs from num_inputs remaining to be queued to the read - * FIFO in the VS/CS. - */ - uint32_t num_inputs_remaining; - - /* Number of inputs currently in the read FIFO for the VS/CS */ - uint32_t num_inputs_in_fifo; - - /** Next offset in the VPM to read from in the VS/CS */ - uint32_t vpm_read_offset; - uint32_t program_id; uint32_t variant_id; @@ -620,12 +636,8 @@ struct v3d_uniform_list { struct v3d_prog_data { struct v3d_uniform_list uniforms; - struct v3d_ubo_range *ubo_ranges; - uint32_t num_ubo_ranges; - uint32_t ubo_size; uint32_t spill_size; - uint8_t num_inputs; uint8_t threads; /* For threads > 1, whether the program should be dispatched in the @@ -640,13 +652,21 @@ struct v3d_vs_prog_data { bool uses_iid, uses_vid; /* Number of components read from each vertex attribute. */ - uint8_t vattr_sizes[32]; + uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; /* Total number of components read, for the shader state record. */ uint32_t vpm_input_size; /* Total number of components written, for the shader state record. */ uint32_t vpm_output_size; + + /* Set if there should be separate VPM segments for input and output. + * If unset, vpm_input_size will be 0. + */ + bool separate_segments; + + /* Value to be programmed in VCM_CACHE_SIZE. */ + uint8_t vcm_cache_size; }; struct v3d_fs_prog_data { @@ -665,11 +685,24 @@ struct v3d_fs_prog_data { uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + uint8_t num_inputs; bool writes_z; - bool discard; + bool disable_ez; bool uses_center_w; }; +struct v3d_compute_prog_data { + struct v3d_prog_data base; + /* Size in bytes of the workgroup's shared space. */ + uint32_t shared_size; +}; + +static inline bool +vir_has_uniform(struct qinst *inst) +{ + return inst->uniform != ~0; +} + /* Special nir_load_input intrinsic index for loading the current TLB * destination color. */ @@ -683,19 +716,15 @@ const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devin void v3d_compiler_free(const struct v3d_compiler *compiler); void v3d_optimize_nir(struct nir_shader *s); -uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, - struct v3d_vs_key *key, - struct v3d_vs_prog_data *prog_data, - nir_shader *s, - int program_id, int variant_id, - uint32_t *final_assembly_size); - -uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, - struct v3d_fs_key *key, - struct v3d_fs_prog_data *prog_data, - nir_shader *s, - int program_id, int variant_id, - uint32_t *final_assembly_size); +uint64_t *v3d_compile(const struct v3d_compiler *compiler, + struct v3d_key *key, + struct v3d_prog_data **prog_data, + nir_shader *s, + void (*debug_output)(const char *msg, + void *debug_output_data), + void *debug_output_data, + int program_id, int variant_id, + uint32_t *final_assembly_size); void v3d_nir_to_vir(struct v3d_compile *c); @@ -710,27 +739,30 @@ struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1); struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1); -struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0); +struct qinst *vir_branch_inst(struct v3d_compile *c, + enum v3d_qpu_branch_cond cond); void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst); +uint32_t vir_get_uniform_index(struct v3d_compile *c, + enum quniform_contents contents, + uint32_t data); struct qreg vir_uniform(struct v3d_compile *c, enum quniform_contents contents, uint32_t data); void vir_schedule_instructions(struct v3d_compile *c); +void v3d_setup_spill_base(struct v3d_compile *c); struct v3d_qpu_instr v3d_qpu_nop(void); struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); +void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf); void vir_set_unpack(struct qinst *inst, int src, enum v3d_qpu_input_unpack unpack); struct qreg vir_get_temp(struct v3d_compile *c); void vir_emit_last_thrsw(struct v3d_compile *c); void vir_calculate_live_intervals(struct v3d_compile *c); -bool vir_has_implicit_uniform(struct qinst *inst); -int vir_get_implicit_uniform_src(struct qinst *inst); -int vir_get_non_sideband_nsrc(struct qinst *inst); int vir_get_nsrc(struct qinst *inst); bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op); @@ -739,8 +771,6 @@ bool vir_is_raw_mov(struct qinst *inst); bool vir_is_tex(struct qinst *inst); bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); -bool vir_is_float_input(struct qinst *inst); -bool vir_depends_on_flags(struct qinst *inst); bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); @@ -752,6 +782,7 @@ void vir_emit_thrsw(struct v3d_compile *c); void vir_dump(struct v3d_compile *c); void vir_dump_inst(struct v3d_compile *c, struct qinst *inst); +void vir_dump_uniform(enum quniform_contents contents, uint32_t data); void vir_validate(struct v3d_compile *c); @@ -761,17 +792,22 @@ bool vir_opt_constant_folding(struct v3d_compile *c); bool vir_opt_copy_propagate(struct v3d_compile *c); bool vir_opt_dead_code(struct v3d_compile *c); bool vir_opt_peephole_sf(struct v3d_compile *c); +bool vir_opt_redundant_flags(struct v3d_compile *c); bool vir_opt_small_immediates(struct v3d_compile *c); bool vir_opt_vpm(struct v3d_compile *c); void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); +void v3d_nir_lower_scratch(nir_shader *s); void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); +void v3d_nir_lower_image_load_store(nir_shader *s); void vir_lower_uniforms(struct v3d_compile *c); void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); void v3d33_vir_vpm_write_setup(struct v3d_compile *c); void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); +void v3d40_vir_emit_image_load_store(struct v3d_compile *c, + nir_intrinsic_instr *instr); void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); @@ -779,7 +815,7 @@ void qpu_validate(struct v3d_compile *c); struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled); bool vir_init_reg_sets(struct v3d_compiler *compiler); -void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf); +bool v3d_gl_format_is_return_32(GLenum format); static inline bool quniform_contents_is_texture_p0(enum quniform_contents contents) @@ -789,6 +825,12 @@ quniform_contents_is_texture_p0(enum quniform_contents contents) V3D_MAX_TEXTURE_SAMPLERS)); } +static inline bool +vir_in_nonuniform_control_flow(struct v3d_compile *c) +{ + return c->execute.file != QFILE_NULL; +} + static inline struct qreg vir_uniform_ui(struct v3d_compile *c, uint32_t ui) { @@ -867,6 +909,33 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ a, b)); \ } +#define VIR_SFU(name) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c, struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ + c->undef, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ + struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ + dest, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} + #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) #define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) #define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) @@ -900,18 +969,19 @@ VIR_A_ALU2(OR) VIR_A_ALU2(XOR) VIR_A_ALU2(VADD) VIR_A_ALU2(VSUB) -VIR_A_ALU2(STVPMV) +VIR_A_NODST_2(STVPMV) VIR_A_ALU1(NOT) VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) VIR_A_ALU1(FLBPUSH) -VIR_A_ALU1(FLBPOP) +VIR_A_ALU1(FLPOP) VIR_A_ALU1(SETMSF) VIR_A_ALU1(SETREVF) VIR_A_ALU0(TIDX) VIR_A_ALU0(EIDX) VIR_A_ALU1(LDVPMV_IN) VIR_A_ALU1(LDVPMV_OUT) +VIR_A_ALU0(TMUWT) VIR_A_ALU0(FXCD) VIR_A_ALU0(XCD) @@ -919,6 +989,7 @@ VIR_A_ALU0(FYCD) VIR_A_ALU0(YCD) VIR_A_ALU0(MSF) VIR_A_ALU0(REVF) +VIR_A_ALU0(BARRIERID) VIR_A_NODST_1(VPMSETUP) VIR_A_NODST_0(VPMWT) VIR_A_ALU2(FCMP) @@ -948,6 +1019,13 @@ VIR_M_NODST_2(MULTOP) VIR_M_ALU1(MOV) VIR_M_ALU1(FMOV) +VIR_SFU(RECIP) +VIR_SFU(RSQRT) +VIR_SFU(EXP) +VIR_SFU(LOG) +VIR_SFU(SIN) +VIR_SFU(RSQRT2) + static inline struct qinst * vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, struct qreg dest, struct qreg src) @@ -1021,10 +1099,10 @@ vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val) */ static inline struct qinst * -vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond) +vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) { /* The actual uniform_data value will be set at scheduling time */ - return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0))); + return vir_emit_nondef(c, vir_branch_inst(c, cond)); } #define vir_for_each_block(block, c) \ @@ -1053,4 +1131,8 @@ vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond) vir_for_each_block(_block, c) \ vir_for_each_inst(inst, _block) +#define vir_for_each_inst_inorder_safe(inst, c) \ + vir_for_each_block(_block, c) \ + vir_for_each_inst_safe(inst, _block) + #endif /* V3D_COMPILER_H */