dst[0] = ir3_DSX(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32;
break;
+ case nir_op_fddx_fine:
+ dst[0] = ir3_DSXPP_1(b, src[0], 0);
+ dst[0]->cat5.type = TYPE_F32;
+ break;
case nir_op_fddy:
case nir_op_fddy_coarse:
dst[0] = ir3_DSY(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32;
break;
break;
+ case nir_op_fddy_fine:
+ dst[0] = ir3_DSYPP_1(b, src[0], 0);
+ dst[0]->cat5.type = TYPE_F32;
+ break;
case nir_op_flt16:
case nir_op_flt32:
dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
static struct ir3_instruction *
get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
- unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned slot = nir_src_as_uint(intr->src[0]);
unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
struct ir3_instruction *texture, *sampler;
struct ir3_instruction **dst)
{
struct ir3_block *b = ctx->block;
- const nir_variable *var = nir_intrinsic_get_var(intr, 0);
struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
struct ir3_instruction *sam;
struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
struct ir3_instruction *coords[4];
- unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
- type_t type = ir3_get_image_type(var);
+ unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
+ type_t type = ir3_get_type_for_image_intrinsic(intr);
/* hmm, this seems a bit odd, but it is what blob does and (at least
* a5xx) just faults on bogus addresses otherwise:
struct ir3_instruction **dst)
{
struct ir3_block *b = ctx->block;
- const nir_variable *var = nir_intrinsic_get_var(intr, 0);
struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
struct ir3_instruction *sam, *lod;
- unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
+ unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
type_t dst_type = nir_dest_bit_size(intr->dest) < 32 ?
TYPE_U16 : TYPE_U32;
*
* TODO: This is at least true on a5xx. Check other gens.
*/
- enum glsl_sampler_dim dim =
- glsl_get_sampler_dim(glsl_without_array(var->type));
- if (dim == GLSL_SAMPLER_DIM_BUF) {
+ if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) {
/* Since all the possible values the divisor can take are
* power-of-two (4, 8, or 16), the division is implemented
* as a shift-right.
*/
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
- const_state->image_dims.off[var->data.driver_location];
+ const_state->image_dims.off[nir_src_as_uint(intr->src[0])];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
struct ir3_instruction *barrier;
switch (intr->intrinsic) {
- case nir_intrinsic_barrier:
+ case nir_intrinsic_control_barrier:
barrier = ir3_BAR(b);
barrier->cat7.g = true;
barrier->cat7.l = true;
IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
break;
- case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
barrier = ir3_FENCE(b);
barrier->cat7.g = true;
struct ir3_instruction *xy[2];
struct ir3_instruction *ij;
- ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_CENTROID, 0x3);
+ ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID, 0x3);
ir3_split_dest(ctx->block, xy, ij, 0, 2);
ctx->ij_centroid = ir3_create_collect(ctx, xy, 2);
struct ir3_instruction *xy[2];
struct ir3_instruction *ij;
- ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SAMPLE, 0x3);
+ ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE, 0x3);
ir3_split_dest(ctx->block, xy, ij, 0, 2);
ctx->ij_sample = ir3_create_collect(ctx, xy, 2);
get_frag_coord(struct ir3_context *ctx)
{
if (!ctx->frag_coord) {
- struct ir3_block *b = ctx->block;
+ struct ir3_block *b = ctx->in_block;
struct ir3_instruction *xyzw[4];
struct ir3_instruction *hw_frag_coord;
hw_frag_coord = create_sysval_input(ctx, SYSTEM_VALUE_FRAG_COORD, 0xf);
- ir3_split_dest(ctx->block, xyzw, hw_frag_coord, 0, 4);
+ ir3_split_dest(b, xyzw, hw_frag_coord, 0, 4);
/* for frag_coord.xy, we get unsigned values.. we need
* to subtract (integer) 8 and divide by 16 (right-
case nir_intrinsic_end_patch_ir3:
assert(ctx->so->type == MESA_SHADER_TESS_CTRL);
- struct ir3_instruction *end = ir3_ENDPATCH(b);
+ struct ir3_instruction *end = ir3_ENDIF(b);
array_insert(b, b->keeps, end);
end->barrier_class = IR3_BARRIER_EVERYTHING;
case nir_intrinsic_load_size_ir3:
if (!ctx->ij_size) {
ctx->ij_size =
- create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SIZE, 0x1);
+ create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_SIZE, 0x1);
}
dst[0] = ctx->ij_size;
break;
* that is easier than mapping things back to a
* nir_variable to figure out what it is.
*/
- dst[i] = ctx->ir->inputs[inloc];
+ dst[i] = ctx->inputs[inloc];
+ compile_assert(ctx, dst[i]);
}
}
} else {
case nir_intrinsic_shared_atomic_comp_swap:
dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
break;
- case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_load:
emit_intrinsic_load_image(ctx, intr, dst);
break;
- case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_store:
if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
!ctx->s->info.fs.early_fragment_tests)
ctx->so->no_earlyz = true;
ctx->funcs->emit_intrinsic_store_image(ctx, intr);
break;
- case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_size:
emit_intrinsic_image_size(ctx, intr, dst);
break;
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_image_atomic_imax:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
!ctx->s->info.fs.early_fragment_tests)
ctx->so->no_earlyz = true;
dst[0] = ctx->funcs->emit_intrinsic_atomic_image(ctx, intr);
break;
- case nir_intrinsic_barrier:
+ case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier:
case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_shared:
}
dst[0] = ctx->basevertex;
break;
+ case nir_intrinsic_load_base_instance:
+ if (!ctx->base_instance) {
+ ctx->base_instance = create_driver_param(ctx, IR3_DP_INSTID_BASE);
+ }
+ dst[0] = ctx->base_instance;
+ break;
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_vertex_id:
if (!ctx->vertex_id) {
cond->regs[0]->flags &= ~IR3_REG_SSA;
kill = ir3_KILL(b, cond, 0);
+ kill->regs[1]->num = regid(REG_P0, 0);
array_insert(ctx->ir, ctx->ir->predicates, kill);
array_insert(b, b->keeps, kill);
/* condition always goes in predicate register: */
cond->regs[0]->num = regid(REG_P0, 0);
- kill = ir3_CONDEND(b, cond, 0);
+ kill = ir3_IF(b, cond, 0);
kill->barrier_class = IR3_BARRIER_EVERYTHING;
kill->barrier_conflict = IR3_BARRIER_EVERYTHING;
static void
tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
{
- unsigned coords, flags = 0;
+ unsigned coords = glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
+ unsigned flags = 0;
/* note: would use tex->coord_components.. except txs.. also,
* since array index goes after shadow ref, we don't want to
* count it:
*/
- switch (tex->sampler_dim) {
- case GLSL_SAMPLER_DIM_1D:
- case GLSL_SAMPLER_DIM_BUF:
- coords = 1;
- break;
- case GLSL_SAMPLER_DIM_2D:
- case GLSL_SAMPLER_DIM_RECT:
- case GLSL_SAMPLER_DIM_EXTERNAL:
- case GLSL_SAMPLER_DIM_MS:
- case GLSL_SAMPLER_DIM_SUBPASS:
- case GLSL_SAMPLER_DIM_SUBPASS_MS:
- coords = 2;
- break;
- case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- coords = 3;
+ if (coords == 3)
flags |= IR3_INSTR_3D;
- break;
- default:
- unreachable("bad sampler_dim");
- }
if (tex->is_shadow && tex->op != nir_texop_lod)
flags |= IR3_INSTR_S;
return;
so->inputs[n].slot = slot;
- so->inputs[n].compmask = (1 << (ncomp + frac)) - 1;
+ so->inputs[n].compmask |= (1 << (ncomp + frac)) - 1;
so->inputs_count = MAX2(so->inputs_count, n + 1);
so->inputs[n].interpolate = in->data.interpolation;
ctx->inputs[idx] = instr;
}
} else if (ctx->so->type == MESA_SHADER_VERTEX) {
- /* We shouldn't have fractional input for VS input.. that only shows
- * up with varying packing
- */
- assert(frac == 0);
+ struct ir3_instruction *input = NULL, *in;
+ struct ir3_instruction *components[4];
+ unsigned mask = (1 << (ncomp + frac)) - 1;
- struct ir3_instruction *input = create_input(ctx, (1 << ncomp) - 1);
- struct ir3_instruction *components[ncomp];
+ foreach_input(in, ctx->ir) {
+ if (in->input.inidx == n) {
+ input = in;
+ break;
+ }
+ }
- input->input.inidx = n;
+ if (!input) {
+ input = create_input(ctx, mask);
+ input->input.inidx = n;
+ } else {
+ input->regs[0]->wrmask |= mask;
+ }
- ir3_split_dest(ctx->block, components, input, 0, ncomp);
+ ir3_split_dest(ctx->block, components, input, frac, ncomp);
for (int i = 0; i < ncomp; i++) {
unsigned idx = (n * 4) + i + frac;
* because sysvals need to be appended after varyings:
*/
if (vcoord) {
- add_sysval_input_compmask(ctx, SYSTEM_VALUE_BARYCENTRIC_PIXEL,
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL,
0x3, vcoord);
}
if (so->binning_pass && (ctx->compiler->gpu_id < 600))
fixup_binning_pass(ctx);
+ ir3_debug_print(ir, "BEFORE CF");
+
+ ir3_cf(ir);
+
ir3_debug_print(ir, "BEFORE CP");
ir3_cp(ir, so);
goto out;
}
- if (compiler->gpu_id >= 600) {
- ir3_a6xx_fixup_atomic_dests(ir, so);
- }
-
ir3_debug_print(ir, "AFTER SCHED");
/* Pre-assign VS inputs on a6xx+ binning pass shader, to align
int idx = 0;
foreach_input(instr, ir) {
- if (instr->input.sysval != SYSTEM_VALUE_BARYCENTRIC_PIXEL)
+ if (instr->input.sysval != SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)
continue;
assert(idx < ARRAY_SIZE(precolor));
goto out;
}
- ir3_debug_print(ir, "AFTER RA");
+ ir3_postsched(ctx);
+ ir3_debug_print(ir, "AFTER POSTSCHED");
+
+ if (compiler->gpu_id >= 600) {
+ if (ir3_a6xx_fixup_atomic_dests(ir, so)) {
+ ir3_debug_print(ir, "AFTER ATOMIC FIXUP");
+ }
+ }
if (so->type == MESA_SHADER_FRAGMENT)
pack_inlocs(ctx);
assert(in->opc == OPC_META_INPUT);
unsigned inidx = in->input.inidx;
- if (pre_assign_inputs) {
+ if (pre_assign_inputs && !so->inputs[inidx].sysval) {
if (VALIDREG(so->nonbinning->inputs[inidx].regid)) {
compile_assert(ctx, in->regs[0]->num ==
so->nonbinning->inputs[inidx].regid);
/* We need to do legalize after (for frag shader's) the "bary.f"
* offsets (inloc) have been assigned.
*/
- ir3_legalize(ir, &so->has_ssbo, &so->need_pixlod, &max_bary);
+ ir3_legalize(ir, so, &max_bary);
ir3_debug_print(ir, "AFTER LEGALIZE");