const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
- uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
+ uint32_t pos_regid, posz_regid, psize_regid;
+ uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid;
+ uint32_t color_regid[4] = {0};
int constmode;
int i, j;
color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
}
+ face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
+ vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_VARYING_COORD);
+
/* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red
*/
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
- COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) |
- A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2))));
- OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
- OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
+ A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
struct ir3_instruction *frag_vcoord;
/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
- struct ir3_instruction *frag_face, *frag_coord[4];
+ struct ir3_instruction *frag_face, *frag_coord;
/* For vertex shaders, keep track of the system values sources */
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
return instr;
}
-static struct ir3_instruction *
-create_frag_coord(struct ir3_context *ctx, unsigned comp)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *instr;
-
- compile_assert(ctx, !ctx->frag_coord[comp]);
-
- ctx->frag_coord[comp] = create_input(ctx, 0);
-
- switch (comp) {
- case 0: /* .x */
- case 1: /* .y */
- /* for frag_coord, we get unsigned values.. we need
- * to subtract (integer) 8 and divide by 16 (right-
- * shift by 4) then convert to float:
- *
- * sub.s tmp, src, 8
- * shr.b tmp, tmp, 4
- * mov.u32f32 dst, tmp
- *
- */
- instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0,
- create_immed(block, 8), 0);
- instr = ir3_SHR_B(block, instr, 0,
- create_immed(block, 4), 0);
- instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
-
- return instr;
- case 2: /* .z */
- case 3: /* .w */
- default:
- /* seems that we can use these as-is: */
- return ctx->frag_coord[comp];
- }
-}
-
static struct ir3_instruction *
create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
if (!ctx->frag_face) {
ctx->so->frag_face = true;
ctx->frag_face = create_input(ctx, 0);
+ add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
}
/* for fragface, we get -1 for back and 0 for front. However this is
ir3_END(ctx->block);
}
+static struct ir3_instruction *
+create_frag_coord(struct ir3_context *ctx, unsigned comp)
+{
+ struct ir3_block *block = ctx->block;
+ struct ir3_instruction *instr;
+
+ if (!ctx->frag_coord) {
+ ctx->frag_coord = create_input_compmask(ctx, 0, 0xf);
+ /* defer add_sysval_input() until after all inputs created */
+ }
+
+ split_dest(block, &instr, ctx->frag_coord, comp, 1);
+
+ switch (comp) {
+ case 0: /* .x */
+ case 1: /* .y */
+ /* for frag_coord, we get unsigned values.. we need
+ * to subtract (integer) 8 and divide by 16 (right-
+ * shift by 4) then convert to float:
+ *
+ * sub.s tmp, src, 8
+ * shr.b tmp, tmp, 4
+ * mov.u32f32 dst, tmp
+ *
+ */
+ instr = ir3_SUB_S(block, instr, 0,
+ create_immed(block, 8), 0);
+ instr = ir3_SHR_B(block, instr, 0,
+ create_immed(block, 4), 0);
+ instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
+
+ return instr;
+ case 2: /* .z */
+ case 3: /* .w */
+ default:
+ /* seems that we can use these as-is: */
+ return instr;
+ }
+}
+
static void
setup_input(struct ir3_context *ctx, nir_variable *in)
{
}
static const unsigned max_sysvals[SHADER_MAX] = {
- [SHADER_FRAGMENT] = 8,
+ [SHADER_FRAGMENT] = 24, // TODO
[SHADER_VERTEX] = 16,
[SHADER_COMPUTE] = 16, // TODO how many do we actually need?
};
ninputs -= max_sysvals[ctx->so->type];
- /* for fragment shader, we have a single input register (usually
- * r0.xy) which is used as the base for bary.f varying fetch instrs:
+ /* for fragment shader, the vcoord input register is used as the
+ * base for bary.f varying fetch instrs:
*/
+ struct ir3_instruction *vcoord = NULL;
if (ctx->so->type == SHADER_FRAGMENT) {
- // TODO maybe a helper for fi since we need it a few places..
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, OPC_META_FI);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
- ctx->frag_vcoord = instr;
+ struct ir3_instruction *xy[2];
+
+ vcoord = create_input_compmask(ctx, 0, 0x3);
+ split_dest(ctx->block, xy, vcoord, 0, 2);
+
+ ctx->frag_vcoord = create_collect(ctx, xy, 2);
}
/* Setup inputs: */
setup_input(ctx, var);
}
+ /* Defer add_sysval_input() stuff until after setup_inputs(),
+ * because sysvals need to be appended after varyings:
+ */
+ if (vcoord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD,
+ 0x3, vcoord);
+ }
+
+ if (ctx->frag_coord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD,
+ 0xf, ctx->frag_coord);
+ }
+
/* Setup outputs: */
nir_foreach_variable(var, &ctx->s->outputs) {
setup_output(ctx, var);
emit_function(ctx, fxn);
}
-/* from NIR perspective, we actually have inputs. But most of the "inputs"
- * for a fragment shader are just bary.f instructions. The *actual* inputs
- * from the hw perspective are the frag_vcoord and optionally frag_coord and
- * frag_face.
+/* from NIR perspective, we actually have varying inputs. But the varying
+ * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The
+ * only actual inputs are the sysvals.
*/
static void
fixup_frag_inputs(struct ir3_context *ctx)
{
struct ir3_shader_variant *so = ctx->so;
struct ir3 *ir = ctx->ir;
- struct ir3_instruction **inputs;
- struct ir3_instruction *instr;
- int n, regid = 0;
-
- ir->ninputs = 0;
-
- n = 4; /* always have frag_vcoord */
- n += COND(so->frag_face, 4);
- n += COND(so->frag_coord, 4);
+ unsigned i = 0;
- inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+ /* sysvals should appear at the end of the inputs, drop everything else: */
+ while ((i < so->inputs_count) && !so->inputs[i].sysval)
+ i++;
- if (so->frag_face) {
- /* this ultimately gets assigned to hr0.x so doesn't conflict
- * with frag_coord/frag_vcoord..
- */
- inputs[ir->ninputs++] = ctx->frag_face;
- ctx->frag_face->regs[0]->num = 0;
+ /* at IR level, inputs are always blocks of 4 scalars: */
+ i *= 4;
- /* remaining channels not used, but let's avoid confusing
- * other parts that expect inputs to come in groups of vec4
- */
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- }
-
- /* since we don't know where to set the regid for frag_coord,
- * we have to use r0.x for it. But we don't want to *always*
- * use r1.x for frag_vcoord as that could increase the register
- * footprint on simple shaders:
- */
- if (so->frag_coord) {
- ctx->frag_coord[0]->regs[0]->num = regid++;
- ctx->frag_coord[1]->regs[0]->num = regid++;
- ctx->frag_coord[2]->regs[0]->num = regid++;
- ctx->frag_coord[3]->regs[0]->num = regid++;
-
- inputs[ir->ninputs++] = ctx->frag_coord[0];
- inputs[ir->ninputs++] = ctx->frag_coord[1];
- inputs[ir->ninputs++] = ctx->frag_coord[2];
- inputs[ir->ninputs++] = ctx->frag_coord[3];
- }
-
- /* we always have frag_vcoord: */
- so->pos_regid = regid;
-
- /* r0.x */
- instr = create_input(ctx, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[1]->instr = instr;
-
- /* r0.y */
- instr = create_input(ctx, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[2]->instr = instr;
-
- ir->inputs = inputs;
+ ir->inputs = &ir->inputs[i];
+ ir->ninputs -= i;
}
/* Fixup tex sampler state for astc/srgb workaround instructions. We