X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fbroadcom%2Fcompiler%2Fv3d_compiler.h;h=70edeed273009fe62f8233aa9b8b3caf88d446a9;hb=f2c0d310d6efe560de8192ab468ba02d50c9ac1e;hp=fdf1b131978fae420ba3b5776690a089789e82e1;hpb=d721348dcdb3658572c5952563d1f4d1ca0321af;p=mesa.git diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index fdf1b131978..70edeed2730 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -115,6 +115,7 @@ static inline struct qreg vir_reg(enum qfile file, uint32_t index) */ struct qpu_reg { bool magic; + bool smimm; int index; }; @@ -244,10 +245,14 @@ enum quniform_contents { QUNIFORM_TEXTURE_BORDER_COLOR, - QUNIFORM_STENCIL, - QUNIFORM_ALPHA_REF, QUNIFORM_SAMPLE_MASK, + + /** + * Returns the the offset of the scratch buffer for register spilling. + */ + QUNIFORM_SPILL_OFFSET, + QUNIFORM_SPILL_SIZE_PER_THREAD, }; struct v3d_varying_slot { @@ -296,18 +301,11 @@ struct v3d_key { uint8_t swizzle[4]; uint8_t return_size; uint8_t return_channels; - union { - struct { - unsigned compare_mode:1; - unsigned compare_func:3; - bool clamp_s:1; - bool clamp_t:1; - bool clamp_r:1; - }; - struct { - uint16_t msaa_width, msaa_height; - }; - }; + unsigned compare_mode:1; + unsigned compare_func:3; + bool clamp_s:1; + bool clamp_t:1; + bool clamp_r:1; } tex[V3D_MAX_TEXTURE_SAMPLERS]; uint8_t ucp_enables; }; @@ -330,6 +328,11 @@ struct v3d_fs_key { uint8_t swap_color_rb; /* Mask of which render targets need to be written as 32-bit floats */ uint8_t f32_color_rb; + /* Masks of which render targets need to be written as ints/uints. + * Used by gallium to work around lost information in TGSI. + */ + uint8_t int_color_rb; + uint8_t uint_color_rb; uint8_t alpha_test_func; uint8_t logicop_func; uint32_t point_sprite_mask; @@ -474,6 +477,12 @@ struct v3d_compile { */ uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + bool uses_center_w; + struct v3d_ubo_range *ubo_ranges; bool *ubo_range_used; uint32_t ubo_ranges_array_size; @@ -506,6 +515,20 @@ struct v3d_compile { uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; uint32_t num_vpm_writes; + /* Size in bytes of registers that have been spilled. This is how much + * space needs to be available in the spill BO per thread per QPU. + */ + uint32_t spill_size; + /* Shader-db stats for register spilling. */ + uint32_t spills, fills; + /** + * Register spilling's per-thread base address, shared between each + * spill/fill's addressing calculations. + */ + struct qreg spill_base; + /* Bit vector of which temps may be spilled */ + BITSET_WORD *spillable; + /** * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. * @@ -528,6 +551,7 @@ struct v3d_compile { /* Live ranges of temps. */ int *temp_start, *temp_end; + bool live_intervals_valid; uint32_t *uniform_data; enum quniform_contents *uniform_contents; @@ -600,6 +624,7 @@ struct v3d_prog_data { struct v3d_ubo_range *ubo_ranges; uint32_t num_ubo_ranges; uint32_t ubo_size; + uint32_t spill_size; uint8_t num_inputs; uint8_t threads; @@ -637,8 +662,13 @@ struct v3d_fs_prog_data { */ uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + + uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + bool writes_z; bool discard; + bool uses_center_w; }; /* Special nir_load_input intrinsic index for loading the current TLB @@ -697,6 +727,7 @@ void vir_set_unpack(struct qinst *inst, int src, enum v3d_qpu_input_unpack unpack); struct qreg vir_get_temp(struct v3d_compile *c); +void vir_emit_last_thrsw(struct v3d_compile *c); void vir_calculate_live_intervals(struct v3d_compile *c); bool vir_has_implicit_uniform(struct qinst *inst); int vir_get_implicit_uniform_src(struct qinst *inst); @@ -746,7 +777,7 @@ void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); void qpu_validate(struct v3d_compile *c); -struct qpu_reg *v3d_register_allocate(struct v3d_compile *c); +struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled); bool vir_init_reg_sets(struct v3d_compiler *compiler); void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf); @@ -837,6 +868,33 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ a, b)); \ } +#define VIR_SFU(name) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c, struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ + c->undef, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ + struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ + dest, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} + #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) #define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) #define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) @@ -870,18 +928,19 @@ VIR_A_ALU2(OR) VIR_A_ALU2(XOR) VIR_A_ALU2(VADD) VIR_A_ALU2(VSUB) -VIR_A_ALU2(STVPMV) +VIR_A_NODST_2(STVPMV) VIR_A_ALU1(NOT) VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) VIR_A_ALU1(FLBPUSH) -VIR_A_ALU1(FLBPOP) +VIR_A_ALU1(FLPOP) VIR_A_ALU1(SETMSF) VIR_A_ALU1(SETREVF) VIR_A_ALU0(TIDX) VIR_A_ALU0(EIDX) VIR_A_ALU1(LDVPMV_IN) VIR_A_ALU1(LDVPMV_OUT) +VIR_A_ALU0(TMUWT) VIR_A_ALU0(FXCD) VIR_A_ALU0(XCD) @@ -918,6 +977,13 @@ VIR_M_NODST_2(MULTOP) VIR_M_ALU1(MOV) VIR_M_ALU1(FMOV) +VIR_SFU(RECIP) +VIR_SFU(RSQRT) +VIR_SFU(EXP) +VIR_SFU(LOG) +VIR_SFU(SIN) +VIR_SFU(RSQRT2) + static inline struct qinst * vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, struct qreg dest, struct qreg src)