#include "util/macros.h"
#include "common/v3d_debug.h"
+#include "common/v3d_device_info.h"
#include "compiler/nir/nir.h"
#include "util/list.h"
#include "util/u_math.h"
* or physical registers later.
*/
QFILE_TEMP,
- QFILE_VARY,
QFILE_UNIF,
QFILE_TLB,
QFILE_TLBU,
*/
struct qpu_reg {
bool magic;
+ bool smimm;
int index;
};
QUNIFORM_USER_CLIP_PLANE,
/**
- * A reference to a texture config parameter 0 uniform.
+ * A reference to a V3D 3.x texture config parameter 0 uniform.
*
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
* defines texture type, miplevels, and such. It will be found as a
QUNIFORM_TEXTURE_CONFIG_P0_32,
/**
- * A reference to a texture config parameter 1 uniform.
+ * A reference to a V3D 3.x texture config parameter 1 uniform.
*
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
* has the pointer to the indirect texture state. Our data[] field
*/
QUNIFORM_TEXTURE_CONFIG_P1,
+ /* A V3D 4.x texture config parameter. The high 8 bits will be
+ * which texture or sampler is being sampled, and the driver must
+ * replace the address field with the appropriate address.
+ */
+ QUNIFORM_TMU_CONFIG_P0,
+ QUNIFORM_TMU_CONFIG_P1,
+
QUNIFORM_TEXTURE_FIRST_LEVEL,
QUNIFORM_TEXTURE_WIDTH,
QUNIFORM_TEXRECT_SCALE_X,
QUNIFORM_TEXRECT_SCALE_Y,
- QUNIFORM_TEXTURE_BORDER_COLOR,
-
- QUNIFORM_STENCIL,
-
QUNIFORM_ALPHA_REF,
- QUNIFORM_SAMPLE_MASK,
+
+ /**
+ * Returns the the offset of the scratch buffer for register spilling.
+ */
+ QUNIFORM_SPILL_OFFSET,
+ QUNIFORM_SPILL_SIZE_PER_THREAD,
};
+static inline uint32_t v3d_tmu_config_data_create(uint32_t unit, uint32_t value)
+{
+ return unit << 24 | value;
+}
+
+static inline uint32_t v3d_tmu_config_data_get_unit(uint32_t data)
+{
+ return data >> 24;
+}
+
+static inline uint32_t v3d_tmu_config_data_get_value(uint32_t data)
+{
+ return data & 0xffffff;
+}
+
struct v3d_varying_slot {
uint8_t slot_and_component;
};
uint8_t swizzle[4];
uint8_t return_size;
uint8_t return_channels;
- union {
- struct {
- unsigned compare_mode:1;
- unsigned compare_func:3;
- bool clamp_s:1;
- bool clamp_t:1;
- bool clamp_r:1;
- };
- struct {
- uint16_t msaa_width, msaa_height;
- };
- };
+ bool clamp_s:1;
+ bool clamp_t:1;
+ bool clamp_r:1;
} tex[V3D_MAX_TEXTURE_SAMPLERS];
uint8_t ucp_enables;
};
uint8_t swap_color_rb;
/* Mask of which render targets need to be written as 32-bit floats */
uint8_t f32_color_rb;
+ /* Masks of which render targets need to be written as ints/uints.
+ * Used by gallium to work around lost information in TGSI.
+ */
+ uint8_t int_color_rb;
+ uint8_t uint_color_rb;
uint8_t alpha_test_func;
uint8_t logicop_func;
uint32_t point_sprite_mask;
/** @} */
};
+/** Which util/list.h add mode we should use when inserting an instruction. */
+enum vir_cursor_mode {
+ vir_cursor_add,
+ vir_cursor_addtail,
+};
+
+/**
+ * Tracking structure for where new instructions should be inserted. Create
+ * with one of the vir_after_inst()-style helper functions.
+ *
+ * This does not protect against removal of the block or instruction, so we
+ * have an assert in instruction removal to try to catch it.
+ */
+struct vir_cursor {
+ enum vir_cursor_mode mode;
+ struct list_head *link;
+};
+
+static inline struct vir_cursor
+vir_before_inst(struct qinst *inst)
+{
+ return (struct vir_cursor){ vir_cursor_addtail, &inst->link };
+}
+
+static inline struct vir_cursor
+vir_after_inst(struct qinst *inst)
+{
+ return (struct vir_cursor){ vir_cursor_add, &inst->link };
+}
+
+static inline struct vir_cursor
+vir_before_block(struct qblock *block)
+{
+ return (struct vir_cursor){ vir_cursor_add, &block->instructions };
+}
+
+static inline struct vir_cursor
+vir_after_block(struct qblock *block)
+{
+ return (struct vir_cursor){ vir_cursor_addtail, &block->instructions };
+}
+
/**
* Compiler state saved across compiler invocations, for any expensive global
* setup.
struct exec_list *cf_node_list;
const struct v3d_compiler *compiler;
+ void (*debug_output)(const char *msg,
+ void *debug_output_data);
+ void *debug_output_data;
+
/**
* Mapping from nir_register * or nir_ssa_def * to array of struct
* qreg for the values.
*/
uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+ uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+ uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+ bool uses_center_w;
+
struct v3d_ubo_range *ubo_ranges;
bool *ubo_range_used;
uint32_t ubo_ranges_array_size;
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
uint32_t num_vpm_writes;
+ /* Size in bytes of registers that have been spilled. This is how much
+ * space needs to be available in the spill BO per thread per QPU.
+ */
+ uint32_t spill_size;
+ /* Shader-db stats */
+ uint32_t spills, fills, loops;
+ /**
+ * Register spilling's per-thread base address, shared between each
+ * spill/fill's addressing calculations.
+ */
+ struct qreg spill_base;
+ /* Bit vector of which temps may be spilled */
+ BITSET_WORD *spillable;
+
/**
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
*
/* Live ranges of temps. */
int *temp_start, *temp_end;
+ bool live_intervals_valid;
uint32_t *uniform_data;
enum quniform_contents *uniform_contents;
struct qreg undef;
uint32_t num_temps;
+ struct vir_cursor cursor;
struct list_head blocks;
int next_block_index;
struct qblock *cur_block;
struct v3d_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
+ uint32_t spill_size;
uint8_t num_inputs;
uint8_t threads;
/* Total number of components written, for the shader state record. */
uint32_t vpm_output_size;
+
+ /* Set if there should be separate VPM segments for input and output.
+ * If unset, vpm_input_size will be 0.
+ */
+ bool separate_segments;
+
+ /* Value to be programmed in VCM_CACHE_SIZE. */
+ uint8_t vcm_cache_size;
};
struct v3d_fs_prog_data {
*/
uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
+ uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
+
+ uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
+
bool writes_z;
bool discard;
+ bool uses_center_w;
};
/* Special nir_load_input intrinsic index for loading the current TLB
struct v3d_vs_key *key,
struct v3d_vs_prog_data *prog_data,
nir_shader *s,
+ void (*debug_output)(const char *msg,
+ void *debug_output_data),
+ void *debug_output_data,
int program_id, int variant_id,
uint32_t *final_assembly_size);
struct v3d_fs_key *key,
struct v3d_fs_prog_data *prog_data,
nir_shader *s,
+ void (*debug_output)(const char *msg,
+ void *debug_output_data),
+ void *debug_output_data,
int program_id, int variant_id,
uint32_t *final_assembly_size);
struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
+void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf);
void vir_set_unpack(struct qinst *inst, int src,
enum v3d_qpu_input_unpack unpack);
struct qreg vir_get_temp(struct v3d_compile *c);
+void vir_emit_last_thrsw(struct v3d_compile *c);
void vir_calculate_live_intervals(struct v3d_compile *c);
bool vir_has_implicit_uniform(struct qinst *inst);
int vir_get_implicit_uniform_src(struct qinst *inst);
bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
bool vir_is_float_input(struct qinst *inst);
-bool vir_depends_on_flags(struct qinst *inst);
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
uint8_t vir_channels_written(struct qinst *inst);
+struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i);
+void ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
+ struct qreg result);
+void vir_emit_thrsw(struct v3d_compile *c);
void vir_dump(struct v3d_compile *c);
void vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
+void vir_dump_uniform(enum quniform_contents contents, uint32_t data);
void vir_validate(struct v3d_compile *c);
void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
void vir_lower_uniforms(struct v3d_compile *c);
+void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
+void v3d33_vir_vpm_write_setup(struct v3d_compile *c);
+void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
+void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
+
void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
void qpu_validate(struct v3d_compile *c);
-struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);
+struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled);
bool vir_init_reg_sets(struct v3d_compiler *compiler);
void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
a, b)); \
}
+#define VIR_SFU(name) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c, struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
+ c->undef, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
+ struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
+ dest, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+}
+
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
VIR_A_ALU2(XOR)
VIR_A_ALU2(VADD)
VIR_A_ALU2(VSUB)
-VIR_A_ALU2(STVPMV)
+VIR_A_NODST_2(STVPMV)
VIR_A_ALU1(NOT)
VIR_A_ALU1(NEG)
VIR_A_ALU1(FLAPUSH)
VIR_A_ALU1(FLBPUSH)
-VIR_A_ALU1(FLBPOP)
+VIR_A_ALU1(FLPOP)
VIR_A_ALU1(SETMSF)
VIR_A_ALU1(SETREVF)
-VIR_A_ALU1(TIDX)
-VIR_A_ALU1(EIDX)
+VIR_A_ALU0(TIDX)
+VIR_A_ALU0(EIDX)
VIR_A_ALU1(LDVPMV_IN)
VIR_A_ALU1(LDVPMV_OUT)
+VIR_A_ALU0(TMUWT)
VIR_A_ALU0(FXCD)
VIR_A_ALU0(XCD)
VIR_M_ALU1(MOV)
VIR_M_ALU1(FMOV)
+VIR_SFU(RECIP)
+VIR_SFU(RSQRT)
+VIR_SFU(EXP)
+VIR_SFU(LOG)
+VIR_SFU(SIN)
+VIR_SFU(RSQRT2)
+
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP,
c->undef, c->undef, c->undef));
}
+
+static inline struct qreg
+vir_LDTMU(struct v3d_compile *c)
+{
+ if (c->devinfo->ver >= 41) {
+ struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+ c->undef, c->undef);
+ ldtmu->qpu.sig.ldtmu = true;
+
+ return vir_emit_def(c, ldtmu);
+ } else {
+ vir_NOP(c)->qpu.sig.ldtmu = true;
+ return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
+ }
+}
+
+static inline struct qreg
+vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1)
+{
+ vir_MULTOP(c, src0, src1);
+ return vir_UMUL24(c, src0, src1);
+}
+
/*
static inline struct qreg
vir_LOAD_IMM(struct v3d_compile *c, uint32_t val)
*/
static inline struct qinst *
-vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond)
+vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
{
/* The actual uniform_data value will be set at scheduling time */
return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0)));