struct ir3_instruction *frag_vcoord;
/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
- struct ir3_instruction *frag_face, *frag_coord[4];
+ struct ir3_instruction *frag_face, *frag_coord;
/* For vertex shaders, keep track of the system values sources */
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
}
static struct ir3_instruction *
-create_input_compmask(struct ir3_block *block, unsigned n, unsigned compmask)
+create_input_compmask(struct ir3_context *ctx, unsigned n, unsigned compmask)
{
struct ir3_instruction *in;
- in = ir3_instr_create(block, OPC_META_INPUT);
- in->inout.block = block;
+ in = ir3_instr_create(ctx->in_block, OPC_META_INPUT);
+ in->inout.block = ctx->in_block;
ir3_reg_create(in, n, 0);
in->regs[0]->wrmask = compmask;
}
static struct ir3_instruction *
-create_input(struct ir3_block *block, unsigned n)
+create_input(struct ir3_context *ctx, unsigned n)
{
- return create_input_compmask(block, n, 0x1);
+ return create_input_compmask(ctx, n, 0x1);
}
static struct ir3_instruction *
return instr;
}
-static struct ir3_instruction *
-create_frag_coord(struct ir3_context *ctx, unsigned comp)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *instr;
-
- compile_assert(ctx, !ctx->frag_coord[comp]);
-
- ctx->frag_coord[comp] = create_input(ctx->block, 0);
-
- switch (comp) {
- case 0: /* .x */
- case 1: /* .y */
- /* for frag_coord, we get unsigned values.. we need
- * to subtract (integer) 8 and divide by 16 (right-
- * shift by 4) then convert to float:
- *
- * sub.s tmp, src, 8
- * shr.b tmp, tmp, 4
- * mov.u32f32 dst, tmp
- *
- */
- instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0,
- create_immed(block, 8), 0);
- instr = ir3_SHR_B(block, instr, 0,
- create_immed(block, 4), 0);
- instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
-
- return instr;
- case 2: /* .z */
- case 3: /* .w */
- default:
- /* seems that we can use these as-is: */
- return ctx->frag_coord[comp];
- }
-}
-
static struct ir3_instruction *
create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
if (!ctx->vertex_id) {
gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ?
SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
- ctx->vertex_id = create_input(b, 0);
+ ctx->vertex_id = create_input(ctx, 0);
add_sysval_input(ctx, sv, ctx->vertex_id);
}
dst[0] = ctx->vertex_id;
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(b, 0);
+ ctx->instance_id = create_input(ctx, 0);
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_id_no_per_sample:
if (!ctx->samp_id) {
- ctx->samp_id = create_input(b, 0);
+ ctx->samp_id = create_input(ctx, 0);
ctx->samp_id->regs[0]->flags |= IR3_REG_HALF;
add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_ID,
ctx->samp_id);
break;
case nir_intrinsic_load_sample_mask_in:
if (!ctx->samp_mask_in) {
- ctx->samp_mask_in = create_input(b, 0);
+ ctx->samp_mask_in = create_input(ctx, 0);
add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN,
ctx->samp_mask_in);
}
case nir_intrinsic_load_front_face:
if (!ctx->frag_face) {
ctx->so->frag_face = true;
- ctx->frag_face = create_input(b, 0);
+ ctx->frag_face = create_input(ctx, 0);
+ add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
}
/* for fragface, we get -1 for back and 0 for front. However this is
break;
case nir_intrinsic_load_local_invocation_id:
if (!ctx->local_invocation_id) {
- ctx->local_invocation_id = create_input_compmask(b, 0, 0x7);
+ ctx->local_invocation_id = create_input_compmask(ctx, 0, 0x7);
add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID,
0x7, ctx->local_invocation_id);
}
break;
case nir_intrinsic_load_work_group_id:
if (!ctx->work_group_id) {
- ctx->work_group_id = create_input_compmask(b, 0, 0x7);
+ ctx->work_group_id = create_input_compmask(ctx, 0, 0x7);
add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID,
0x7, ctx->work_group_id);
ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH;
* so that it is seen as live over the entire duration
* of the shader:
*/
- vtxcnt = create_input(ctx->in_block, 0);
+ vtxcnt = create_input(ctx, 0);
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
ir3_END(ctx->block);
}
+static struct ir3_instruction *
+create_frag_coord(struct ir3_context *ctx, unsigned comp)
+{
+ struct ir3_block *block = ctx->block;
+ struct ir3_instruction *instr;
+
+ if (!ctx->frag_coord) {
+ ctx->frag_coord = create_input_compmask(ctx, 0, 0xf);
+ /* defer add_sysval_input() until after all inputs created */
+ }
+
+ split_dest(block, &instr, ctx->frag_coord, comp, 1);
+
+ switch (comp) {
+ case 0: /* .x */
+ case 1: /* .y */
+ /* for frag_coord, we get unsigned values.. we need
+ * to subtract (integer) 8 and divide by 16 (right-
+ * shift by 4) then convert to float:
+ *
+ * sub.s tmp, src, 8
+ * shr.b tmp, tmp, 4
+ * mov.u32f32 dst, tmp
+ *
+ */
+ instr = ir3_SUB_S(block, instr, 0,
+ create_immed(block, 8), 0);
+ instr = ir3_SHR_B(block, instr, 0,
+ create_immed(block, 4), 0);
+ instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
+
+ return instr;
+ case 2: /* .z */
+ case 3: /* .w */
+ default:
+ /* seems that we can use these as-is: */
+ return instr;
+ }
+}
+
static void
setup_input(struct ir3_context *ctx, nir_variable *in)
{
for (int i = 0; i < ncomp; i++) {
unsigned idx = (n * 4) + i;
compile_assert(ctx, idx < ctx->ir->ninputs);
- ctx->ir->inputs[idx] = create_input(ctx->block, idx);
+ ctx->ir->inputs[idx] = create_input(ctx, idx);
}
} else {
compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
}
static const unsigned max_sysvals[SHADER_MAX] = {
- [SHADER_FRAGMENT] = 8,
+ [SHADER_FRAGMENT] = 24, // TODO
[SHADER_VERTEX] = 16,
[SHADER_COMPUTE] = 16, // TODO how many do we actually need?
};
ninputs -= max_sysvals[ctx->so->type];
- /* for fragment shader, we have a single input register (usually
- * r0.xy) which is used as the base for bary.f varying fetch instrs:
+ /* for fragment shader, the vcoord input register is used as the
+ * base for bary.f varying fetch instrs:
*/
+ struct ir3_instruction *vcoord = NULL;
if (ctx->so->type == SHADER_FRAGMENT) {
- // TODO maybe a helper for fi since we need it a few places..
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, OPC_META_FI);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
- ctx->frag_vcoord = instr;
+ struct ir3_instruction *xy[2];
+
+ vcoord = create_input_compmask(ctx, 0, 0x3);
+ split_dest(ctx->block, xy, vcoord, 0, 2);
+
+ ctx->frag_vcoord = create_collect(ctx, xy, 2);
}
/* Setup inputs: */
setup_input(ctx, var);
}
+ /* Defer add_sysval_input() stuff until after setup_inputs(),
+ * because sysvals need to be appended after varyings:
+ */
+ if (vcoord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD,
+ 0x3, vcoord);
+ }
+
+ if (ctx->frag_coord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD,
+ 0xf, ctx->frag_coord);
+ }
+
/* Setup outputs: */
nir_foreach_variable(var, &ctx->s->outputs) {
setup_output(ctx, var);
emit_function(ctx, fxn);
}
-/* from NIR perspective, we actually have inputs. But most of the "inputs"
- * for a fragment shader are just bary.f instructions. The *actual* inputs
- * from the hw perspective are the frag_vcoord and optionally frag_coord and
- * frag_face.
+/* from NIR perspective, we actually have varying inputs. But the varying
+ * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The
+ * only actual inputs are the sysvals.
*/
static void
fixup_frag_inputs(struct ir3_context *ctx)
{
struct ir3_shader_variant *so = ctx->so;
struct ir3 *ir = ctx->ir;
- struct ir3_instruction **inputs;
- struct ir3_instruction *instr;
- int n, regid = 0;
-
- ir->ninputs = 0;
-
- n = 4; /* always have frag_vcoord */
- n += COND(so->frag_face, 4);
- n += COND(so->frag_coord, 4);
-
- inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
-
- if (so->frag_face) {
- /* this ultimately gets assigned to hr0.x so doesn't conflict
- * with frag_coord/frag_vcoord..
- */
- inputs[ir->ninputs++] = ctx->frag_face;
- ctx->frag_face->regs[0]->num = 0;
-
- /* remaining channels not used, but let's avoid confusing
- * other parts that expect inputs to come in groups of vec4
- */
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- }
-
- /* since we don't know where to set the regid for frag_coord,
- * we have to use r0.x for it. But we don't want to *always*
- * use r1.x for frag_vcoord as that could increase the register
- * footprint on simple shaders:
- */
- if (so->frag_coord) {
- ctx->frag_coord[0]->regs[0]->num = regid++;
- ctx->frag_coord[1]->regs[0]->num = regid++;
- ctx->frag_coord[2]->regs[0]->num = regid++;
- ctx->frag_coord[3]->regs[0]->num = regid++;
-
- inputs[ir->ninputs++] = ctx->frag_coord[0];
- inputs[ir->ninputs++] = ctx->frag_coord[1];
- inputs[ir->ninputs++] = ctx->frag_coord[2];
- inputs[ir->ninputs++] = ctx->frag_coord[3];
- }
-
- /* we always have frag_vcoord: */
- so->pos_regid = regid;
+ unsigned i = 0;
- /* r0.x */
- instr = create_input(ctx->in_block, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[1]->instr = instr;
+ /* sysvals should appear at the end of the inputs, drop everything else: */
+ while ((i < so->inputs_count) && !so->inputs[i].sysval)
+ i++;
- /* r0.y */
- instr = create_input(ctx->in_block, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[2]->instr = instr;
+ /* at IR level, inputs are always blocks of 4 scalars: */
+ i *= 4;
- ir->inputs = inputs;
+ ir->inputs = &ir->inputs[i];
+ ir->ninputs -= i;
}
/* Fixup tex sampler state for astc/srgb workaround instructions. We
actual_in = 0;
inloc = 0;
for (i = 0; i < so->inputs_count; i++) {
- unsigned j, regid = ~0, compmask = 0, maxcomp = 0;
+ unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
so->inputs[i].ncomp = 0;
so->inputs[i].inloc = inloc;
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
compmask |= (1 << j);
- regid = in->regs[0]->num - j;
+ reg = in->regs[0]->num - j;
actual_in++;
so->inputs[i].ncomp++;
if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) {
} else if (!so->inputs[i].sysval) {
so->inputs[i].compmask = compmask;
}
- so->inputs[i].regid = regid;
+ so->inputs[i].regid = reg;
}
if (ctx->astc_srgb)