*/
#include <inttypes.h>
-#include "util/u_format.h"
+#include "util/format/u_format.h"
+#include "util/u_helpers.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/ralloc.h"
return qregs;
}
+static bool
+is_ld_signal(const struct v3d_qpu_sig *sig)
+{
+ return (sig->ldunif ||
+ sig->ldunifa ||
+ sig->ldunifrf ||
+ sig->ldunifarf ||
+ sig->ldtmu ||
+ sig->ldvary ||
+ sig->ldvpm ||
+ sig->ldtlb ||
+ sig->ldtlbu);
+}
+
/**
* This function is responsible for getting VIR results into the associated
* storage for a NIR instruction.
struct qreg result)
{
struct qinst *last_inst = NULL;
- if (!list_empty(&c->cur_block->instructions))
+ if (!list_is_empty(&c->cur_block->instructions))
last_inst = (struct qinst *)c->cur_block->instructions.prev;
assert((result.file == QFILE_TEMP &&
_mesa_hash_table_search(c->def_ht, reg);
struct qreg *qregs = entry->data;
- /* Insert a MOV if the source wasn't an SSA def in the
- * previous instruction.
+ /* If the previous instruction can't be predicated for
+ * the store into the nir_register, then emit a MOV
+ * that can be.
*/
- if ((vir_in_nonuniform_control_flow(c) &&
- c->defs[last_inst->dst.index]->qpu.sig.ldunif)) {
+ if (vir_in_nonuniform_control_flow(c) &&
+ is_ld_signal(&c->defs[last_inst->dst.index]->qpu.sig)) {
result = vir_MOV(c, result);
last_inst = c->defs[result.index];
}
vir_emit_tlb_color_write(c, rt);
}
+static inline void
+vir_VPM_WRITE_indirect(struct v3d_compile *c,
+ struct qreg val,
+ struct qreg vpm_index)
+{
+ assert(c->devinfo->ver >= 40);
+ vir_STVPMV(c, vpm_index, val);
+}
+
static void
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
{
if (c->devinfo->ver >= 40) {
- vir_STVPMV(c, vir_uniform_ui(c, vpm_index), val);
+ vir_VPM_WRITE_indirect(c, val, vir_uniform_ui(c, vpm_index));
} else {
/* XXX: v3d33_vir_vpm_write_setup(c); */
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
vir_VPMWT(c);
}
+static void
+emit_geom_end(struct v3d_compile *c)
+{
+ /* GFXH-1684: VPM writes need to be complete by the end of the shader.
+ */
+ if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+ vir_VPMWT(c);
+}
+
void
v3d_optimize_nir(struct nir_shader *s)
{
const nir_variable *const *a = in_a;
const nir_variable *const *b = in_b;
+ if ((*a)->data.driver_location == (*b)->data.driver_location)
+ return (*a)->data.location_frac - (*b)->data.location_frac;
+
return (*a)->data.driver_location - (*b)->data.driver_location;
}
}
static void
-ntq_setup_vpm_inputs(struct v3d_compile *c)
+ntq_setup_vs_inputs(struct v3d_compile *c)
{
/* Figure out how many components of each vertex attribute the shader
* uses. Each variable should have been split to individual
}
}
-static bool
-var_needs_point_coord(struct v3d_compile *c, nir_variable *var)
-{
- return (var->data.location == VARYING_SLOT_PNTC ||
- (var->data.location >= VARYING_SLOT_VAR0 &&
- (c->fs_key->point_sprite_mask &
- (1 << (var->data.location - VARYING_SLOT_VAR0)))));
-}
-
static bool
program_reads_point_coord(struct v3d_compile *c)
{
nir_foreach_variable(var, &c->s->inputs) {
- if (var_needs_point_coord(c, var))
+ if (util_varying_is_point_coord(var->data.location,
+ c->fs_key->point_sprite_mask)) {
return true;
+ }
}
return false;
}
static void
-ntq_setup_fs_inputs(struct v3d_compile *c)
+get_sorted_input_variables(struct v3d_compile *c,
+ unsigned *num_entries,
+ nir_variable ***vars)
{
- unsigned num_entries = 0;
- unsigned num_components = 0;
- nir_foreach_variable(var, &c->s->inputs) {
- num_entries++;
- num_components += glsl_get_components(var->type);
- }
+ *num_entries = 0;
+ nir_foreach_variable(var, &c->s->inputs)
+ (*num_entries)++;
- nir_variable *vars[num_entries];
+ *vars = ralloc_array(c, nir_variable *, *num_entries);
unsigned i = 0;
nir_foreach_variable(var, &c->s->inputs)
- vars[i++] = var;
+ (*vars)[i++] = var;
/* Sort the variables so that we emit the input setup in
* driver_location order. This is required for VPM reads, whose data
* is fetched into the VPM in driver_location (TGSI register index)
* order.
*/
- qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
+ qsort(*vars, *num_entries, sizeof(**vars), driver_location_compare);
+}
+
+static void
+ntq_setup_gs_inputs(struct v3d_compile *c)
+{
+ nir_variable **vars;
+ unsigned num_entries;
+ get_sorted_input_variables(c, &num_entries, &vars);
+
+ for (unsigned i = 0; i < num_entries; i++) {
+ nir_variable *var = vars[i];
+
+ /* All GS inputs are arrays with as many entries as vertices
+ * in the input primitive, but here we only care about the
+ * per-vertex input type.
+ */
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned array_len = MAX2(glsl_get_length(type), 1);
+ unsigned loc = var->data.driver_location;
+
+ resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
+ (loc + array_len) * 4);
+
+ for (unsigned j = 0; j < array_len; j++) {
+ unsigned num_elements = glsl_get_vector_elements(type);
+ for (unsigned k = 0; k < num_elements; k++) {
+ unsigned chan = var->data.location_frac + k;
+ unsigned input_idx = c->num_inputs++;
+ struct v3d_varying_slot slot =
+ v3d_slot_from_slot_and_component(var->data.location + j, chan);
+ c->input_slots[input_idx] = slot;
+ }
+ }
+ }
+}
+
+
+static void
+ntq_setup_fs_inputs(struct v3d_compile *c)
+{
+ nir_variable **vars;
+ unsigned num_entries;
+ get_sorted_input_variables(c, &num_entries, &vars);
for (unsigned i = 0; i < num_entries; i++) {
nir_variable *var = vars[i];
if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
- } else if (var_needs_point_coord(c, var)) {
+ } else if (util_varying_is_point_coord(var->data.location,
+ c->fs_key->point_sprite_mask)) {
c->inputs[loc * 4 + 0] = c->point_x;
c->inputs[loc * 4 + 1] = c->point_y;
} else {
static void
ntq_emit_image_size(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
- assert(instr->intrinsic == nir_intrinsic_image_deref_size);
- nir_variable *var = nir_intrinsic_get_var(instr, 0);
- unsigned image_index = var->data.driver_location;
- const struct glsl_type *sampler_type = glsl_without_array(var->type);
- bool is_array = glsl_sampler_type_is_array(sampler_type);
+ unsigned image_index = nir_src_as_uint(instr->src[0]);
+ bool is_array = nir_intrinsic_image_array(instr);
ntq_store_dest(c, &instr->dest, 0,
vir_uniform(c, QUNIFORM_IMAGE_WIDTH, image_index));
if (instr->num_components > 1) {
ntq_store_dest(c, &instr->dest, 1,
- vir_uniform(c, QUNIFORM_IMAGE_HEIGHT,
+ vir_uniform(c,
+ instr->num_components == 2 && is_array ?
+ QUNIFORM_IMAGE_ARRAY_SIZE :
+ QUNIFORM_IMAGE_HEIGHT,
image_index));
}
if (instr->num_components > 2) {
}
}
+static void
+emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ assert(instr->num_components == 1);
+
+ uint32_t base_offset = nir_intrinsic_base(instr);
+ struct qreg src_offset = ntq_get_src(c, instr->src[1], 0);
+ struct qreg offset =
+ vir_ADD(c, vir_uniform_ui(c, base_offset), src_offset);
+
+ /* Usually, for VS or FS, we only emit outputs once at program end so
+ * our VPM writes are never in non-uniform control flow, but this
+ * is not true for GS, where we are emitting multiple vertices.
+ */
+ if (vir_in_nonuniform_control_flow(c)) {
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+ V3D_QPU_PF_PUSHZ);
+ }
+
+ struct qreg val = ntq_get_src(c, instr->src[0], 0);
+
+ /* The offset isn’t necessarily dynamically uniform for a geometry
+ * shader. This can happen if the shader sometimes doesn’t emit one of
+ * the vertices. In that case subsequent vertices will be written to
+ * different offsets in the VPM and we need to use the scatter write
+ * instruction to have a different offset for each lane.
+ */
+ if (nir_src_is_dynamically_uniform(instr->src[1]))
+ vir_VPM_WRITE_indirect(c, val, offset);
+ else
+ vir_STVPMD(c, offset, val);
+
+ if (vir_in_nonuniform_control_flow(c)) {
+ struct qinst *last_inst =
+ (struct qinst *)c->cur_block->instructions.prev;
+ vir_set_cond(last_inst, V3D_QPU_COND_IFA);
+ }
+}
+
+static void
+ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ /* XXX perf: Use stvpmv with uniform non-constant offsets and
+ * stvpmd with non-uniform offsets and enable
+ * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
+ */
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ ntq_emit_color_write(c, instr);
+ } else if (c->s->info.stage == MESA_SHADER_GEOMETRY) {
+ emit_store_output_gs(c, instr);
+ } else {
+ assert(c->s->info.stage == MESA_SHADER_VERTEX);
+ assert(instr->num_components == 1);
+
+ vir_VPM_WRITE(c,
+ ntq_get_src(c, instr->src[0], 0),
+ nir_intrinsic_base(instr));
+ }
+}
+
static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
ntq_emit_tmu_general(c, instr, true);
break;
- case nir_intrinsic_image_deref_load:
- case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_image_atomic_imax:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
v3d40_vir_emit_image_load_store(c, instr);
break;
break;
case nir_intrinsic_load_user_clip_plane:
- for (int i = 0; i < instr->num_components; i++) {
+ for (int i = 0; i < nir_intrinsic_dest_components(instr); i++) {
ntq_store_dest(c, &instr->dest, i,
vir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
nir_intrinsic_ucp_id(instr) *
vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0));
break;
+ case nir_intrinsic_load_alpha_ref_float:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_ALPHA_REF, 0));
+ break;
+
+ case nir_intrinsic_load_line_coord:
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->line_x));
+ break;
+
+ case nir_intrinsic_load_line_width:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_LINE_WIDTH, 0));
+ break;
+
+ case nir_intrinsic_load_aa_line_width:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_AA_LINE_WIDTH, 0));
+ break;
+
case nir_intrinsic_load_sample_mask_in:
ntq_store_dest(c, &instr->dest, 0, vir_MSF(c));
break;
break;
case nir_intrinsic_store_output:
- /* XXX perf: Use stvpmv with uniform non-constant offsets and
- * stvpmd with non-uniform offsets and enable
- * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
- */
- if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
- ntq_emit_color_write(c, instr);
- } else {
- assert(instr->num_components == 1);
-
- vir_VPM_WRITE(c,
- ntq_get_src(c, instr->src[0], 0),
- nir_intrinsic_base(instr));
- }
+ ntq_emit_store_output(c, instr);
break;
- case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_size:
ntq_emit_image_size(c, instr);
break;
}
case nir_intrinsic_memory_barrier:
- case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_shared:
+ case nir_intrinsic_memory_barrier_tcs_patch:
case nir_intrinsic_group_memory_barrier:
/* We don't do any instruction scheduling of these NIR
* instructions between each other, so we just need to make
*/
break;
- case nir_intrinsic_barrier:
+ case nir_intrinsic_control_barrier:
/* Emit a TSY op to get all invocations in the workgroup
* (actually supergroup) to block until the last invocation
* reaches the TSY op.
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
break;
+ case nir_intrinsic_load_per_vertex_input: {
+ /* col: vertex index, row = varying index */
+ struct qreg col = ntq_get_src(c, instr->src[0], 0);
+ uint32_t row_idx = nir_intrinsic_base(instr) * 4 +
+ nir_intrinsic_component(instr);
+ for (int i = 0; i < instr->num_components; i++) {
+ struct qreg row = vir_uniform_ui(c, row_idx++);
+ ntq_store_dest(c, &instr->dest, i,
+ vir_LDVPMG_IN(c, row, col));
+ }
+ break;
+ }
+
+ case nir_intrinsic_emit_vertex:
+ case nir_intrinsic_end_primitive:
+ unreachable("Should have been lowered in v3d_nir_lower_io");
+ break;
+
+ case nir_intrinsic_load_primitive_id: {
+ /* gl_PrimitiveIdIn is written by the GBG in the first word of
+ * VPM output header. According to docs, we should read this
+ * using ldvpm(v,d)_in (See Table 71).
+ */
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_LDVPMV_IN(c, vir_uniform_ui(c, 0)));
+ break;
+ }
+
+ case nir_intrinsic_load_invocation_id:
+ ntq_store_dest(c, &instr->dest, 0, vir_IID(c));
+ break;
+
+ case nir_intrinsic_load_fb_layers_v3d:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_FB_LAYERS, 0));
+ break;
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
{
switch (instr->type) {
- case nir_instr_type_deref:
- /* ignored, will be walked by the intrinsic using it. */
- break;
-
case nir_instr_type_alu:
ntq_emit_alu(c, nir_instr_as_alu(instr));
break;
c->point_x = emit_fragment_varying(c, NULL, 0, 0);
c->point_y = emit_fragment_varying(c, NULL, 0, 0);
c->uses_implicit_point_line_varyings = true;
- } else if (c->fs_key->is_lines && c->devinfo->ver < 40) {
+ } else if (c->fs_key->is_lines &&
+ (c->devinfo->ver < 40 ||
+ (c->s->info.system_values_read &
+ BITFIELD64_BIT(SYSTEM_VALUE_LINE_COORD)))) {
c->line_x = emit_fragment_varying(c, NULL, 0, 0);
c->uses_implicit_point_line_varyings = true;
}
c->spill_size += V3D_CHANNELS * c->s->scratch_size;
}
- if (c->s->info.stage == MESA_SHADER_FRAGMENT)
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ ntq_setup_vs_inputs(c);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ ntq_setup_gs_inputs(c);
+ break;
+ case MESA_SHADER_FRAGMENT:
ntq_setup_fs_inputs(c);
- else
- ntq_setup_vpm_inputs(c);
+ break;
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ unreachable("unsupported shader stage");
+ }
ntq_setup_outputs(c);
case MESA_SHADER_FRAGMENT:
emit_frag_end(c);
break;
+ case MESA_SHADER_GEOMETRY:
+ emit_geom_end(c);
+ break;
case MESA_SHADER_VERTEX:
emit_vert_end(c);
break;