1 : 0]));
}
+ /* The spec says that for atomics, the TYPE field is ignored, but that
+ * doesn't seem to be the case for CMPXCHG. Just use the number of
+ * tmud writes we did to decide the type (or choose "32bit" for atomic
+ * reads, which has been fine).
+ */
+ int num_components;
+ if (tmu_op == GENERAL_TMU_WRITE_OP_ATOMIC_CMPXCHG)
+ num_components = 2;
+ else
+ num_components = instr->num_components;
+
uint32_t config = (0xffffff00 |
tmu_op |
GENERAL_TMU_LOOKUP_PER_PIXEL);
- if (instr->num_components == 1) {
+ if (num_components == 1) {
config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI;
} else {
- config |= (GENERAL_TMU_LOOKUP_TYPE_VEC2 +
- instr->num_components - 2);
+ config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 + num_components - 2;
}
if (vir_in_nonuniform_control_flow(c)) {
struct qreg result;
switch (instr->op) {
- case nir_op_fmov:
- case nir_op_imov:
+ case nir_op_mov:
result = vir_MOV(c, src[0]);
break;
v3d_optimize_nir(struct nir_shader *s)
{
bool progress;
+ unsigned lower_flrp =
+ (s->options->lower_flrp16 ? 16 : 0) |
+ (s->options->lower_flrp32 ? 32 : 0) |
+ (s->options->lower_flrp64 ? 64 : 0);
do {
progress = false;
NIR_PASS_V(s, nir_lower_vars_to_ssa);
- NIR_PASS(progress, s, nir_lower_alu_to_scalar);
+ NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL);
NIR_PASS(progress, s, nir_lower_phis_to_scalar);
NIR_PASS(progress, s, nir_copy_prop);
NIR_PASS(progress, s, nir_opt_remove_phis);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
+
+ if (lower_flrp != 0) {
+ bool lower_flrp_progress = false;
+
+ NIR_PASS(lower_flrp_progress, s, nir_lower_flrp,
+ lower_flrp,
+ false /* always_precise */,
+ s->options->lower_ffma);
+ if (lower_flrp_progress) {
+ NIR_PASS(progress, s, nir_opt_constant_folding);
+ progress = true;
+ }
+
+ /* Nothing should rematerialize any flrps, so we only
+ * need to do this lowering once.
+ */
+ lower_flrp = 0;
+ }
+
NIR_PASS(progress, s, nir_opt_undef);
} while (progress);
}
}
+static bool
+var_needs_point_coord(struct v3d_compile *c, nir_variable *var)
+{
+ return (var->data.location == VARYING_SLOT_PNTC ||
+ (var->data.location >= VARYING_SLOT_VAR0 &&
+ (c->fs_key->point_sprite_mask &
+ (1 << (var->data.location - VARYING_SLOT_VAR0)))));
+}
+
+static bool
+program_reads_point_coord(struct v3d_compile *c)
+{
+ nir_foreach_variable(var, &c->s->inputs) {
+ if (var_needs_point_coord(c, var))
+ return true;
+ }
+
+ return false;
+}
+
static void
ntq_setup_fs_inputs(struct v3d_compile *c)
{
if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
- } else if (var->data.location == VARYING_SLOT_PNTC ||
- (var->data.location >= VARYING_SLOT_VAR0 &&
- (c->fs_key->point_sprite_mask &
- (1 << (var->data.location -
- VARYING_SLOT_VAR0))))) {
+ } else if (var_needs_point_coord(c, var)) {
c->inputs[loc * 4 + 0] = c->point_x;
c->inputs[loc * 4 + 1] = c->point_y;
} else {
break;
case nir_intrinsic_load_input:
+ /* Use ldvpmv (uniform offset) or ldvpmd (non-uniform offset)
+ * and enable PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR.
+ */
offset = (nir_intrinsic_base(instr) +
nir_src_as_uint(instr->src[0]));
if (c->s->info.stage != MESA_SHADER_FRAGMENT &&
break;
case nir_intrinsic_store_output:
+ /* XXX perf: Use stvpmv with uniform non-constant offsets and
+ * stvpmd with non-uniform offsets and enable
+ * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
+ */
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
offset = ((nir_intrinsic_base(instr) +
nir_src_as_uint(instr->src[1])) * 4 +
case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_group_memory_barrier:
/* We don't do any instruction scheduling of these NIR
* instructions between each other, so we just need to make
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
- /* XXX perf: We could set the "disable implicit point/line
- * varyings" field in the shader record and not emit these, if
- * they're not going to be used.
+ /* V3D 4.x can disable implicit point coordinate varyings if
+ * they are not used.
*/
- if (c->fs_key->is_points) {
+ if (c->fs_key->is_points &&
+ (c->devinfo->ver < 40 || program_reads_point_coord(c))) {
c->point_x = emit_fragment_varying(c, NULL, 0, 0);
c->point_y = emit_fragment_varying(c, NULL, 0, 0);
- } else if (c->fs_key->is_lines) {
+ c->uses_implicit_point_line_varyings = true;
+ } else if (c->fs_key->is_lines && c->devinfo->ver < 40) {
c->line_x = emit_fragment_varying(c, NULL, 0, 0);
+ c->uses_implicit_point_line_varyings = true;
}
break;
case MESA_SHADER_COMPUTE:
V3D_QPU_WADDR_SYNC));
}
- if (c->s->info.system_values_read &
- ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
- (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
- c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
- }
- if ((c->s->info.system_values_read &
- ((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) ||
- c->s->info.cs.shared_size) {
- c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
- }
+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
/* Set up the division between gl_LocalInvocationIndex and
* wg_in_mem in the payload reg.
.lower_bit_count = true,
.lower_cs_local_id_from_index = true,
.lower_ffract = true,
+ .lower_fmod = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_ldexp = true,
.lower_mul_high = true,
.lower_wpos_pntc = true,
- .native_integers = true,
};
/**