freedreno/lowering: two-sided-color
authorRob Clark <robclark@freedesktop.org>
Sun, 23 Feb 2014 19:40:41 +0000 (14:40 -0500)
committerRob Clark <robclark@freedesktop.org>
Sun, 2 Mar 2014 16:26:35 +0000 (11:26 -0500)
Add option to generate fragment shader to emulate two sided color.
Additional inputs are added to shader for BCOLOR's (on corresponding to
each COLOR input).  CMP instructions are used to select whether to use
COLOR or BCOLOR.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/freedreno_lowering.c
src/gallium/drivers/freedreno/freedreno_lowering.h

index ae88d6ef54d99889a86838b294dfe82e14c02543..7450fac23c1843ed92eb7b906cc1e0edb0222f72 100644 (file)
@@ -152,6 +152,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
        unsigned ret;
        struct tgsi_shader_info *info = &ctx->info;
        const struct fd_lowering_config lconfig = {
+                       .color_two_side = so->key.color_two_side,
                        .lower_DST  = true,
                        .lower_XPD  = true,
                        .lower_SCS  = true,
@@ -2003,6 +2004,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
                        so->writes_psize = true;
                        break;
                case TGSI_SEMANTIC_COLOR:
+               case TGSI_SEMANTIC_BCOLOR:
                case TGSI_SEMANTIC_GENERIC:
                case TGSI_SEMANTIC_FOG:
                case TGSI_SEMANTIC_TEXCOORD:
@@ -2059,24 +2061,31 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
 {
        struct fd3_shader_variant *so = ctx->so;
        struct ir3_block *block = ctx->block;
+       struct ir3_instruction **inputs;
        struct ir3_instruction *instr;
-       int regid = 0;
+       int n, regid = 0;
 
        block->ninputs = 0;
 
+       n  = 4;  /* always have frag_pos */
+       n += COND(so->frag_face, 4);
+       n += COND(so->frag_coord, 4);
+
+       inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+
        if (so->frag_face) {
                /* this ultimately gets assigned to hr0.x so doesn't conflict
                 * with frag_coord/frag_pos..
                 */
-               block->inputs[block->ninputs++] = ctx->frag_face;
+               inputs[block->ninputs++] = ctx->frag_face;
                ctx->frag_face->regs[0]->num = 0;
 
                /* remaining channels not used, but let's avoid confusing
                 * other parts that expect inputs to come in groups of vec4
                 */
-               block->inputs[block->ninputs++] = NULL;
-               block->inputs[block->ninputs++] = NULL;
-               block->inputs[block->ninputs++] = NULL;
+               inputs[block->ninputs++] = NULL;
+               inputs[block->ninputs++] = NULL;
+               inputs[block->ninputs++] = NULL;
        }
 
        /* since we don't know where to set the regid for frag_coord,
@@ -2090,10 +2099,10 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
                ctx->frag_coord[2]->regs[0]->num = regid++;
                ctx->frag_coord[3]->regs[0]->num = regid++;
 
-               block->inputs[block->ninputs++] = ctx->frag_coord[0];
-               block->inputs[block->ninputs++] = ctx->frag_coord[1];
-               block->inputs[block->ninputs++] = ctx->frag_coord[2];
-               block->inputs[block->ninputs++] = ctx->frag_coord[3];
+               inputs[block->ninputs++] = ctx->frag_coord[0];
+               inputs[block->ninputs++] = ctx->frag_coord[1];
+               inputs[block->ninputs++] = ctx->frag_coord[2];
+               inputs[block->ninputs++] = ctx->frag_coord[3];
        }
 
        /* we always have frag_pos: */
@@ -2102,14 +2111,16 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
        /* r0.x */
        instr = create_input(block, NULL, block->ninputs);
        instr->regs[0]->num = regid++;
-       block->inputs[block->ninputs++] = instr;
+       inputs[block->ninputs++] = instr;
        ctx->frag_pos->regs[1]->instr = instr;
 
        /* r0.y */
        instr = create_input(block, NULL, block->ninputs);
        instr->regs[0]->num = regid++;
-       block->inputs[block->ninputs++] = instr;
+       inputs[block->ninputs++] = instr;
        ctx->frag_pos->regs[2]->instr = instr;
+
+       block->inputs = inputs;
 }
 
 static void
@@ -2189,10 +2200,6 @@ compile_instructions(struct fd3_compile_context *ctx)
                        break;
                }
        }
-
-       /* fixup actual inputs for frag shader: */
-       if (ctx->type == TGSI_PROCESSOR_FRAGMENT)
-               fixup_frag_inputs(ctx);
 }
 
 static void
@@ -2217,6 +2224,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
 {
        struct fd3_compile_context ctx;
        struct ir3_block *block;
+       struct ir3_instruction **inputs;
        unsigned i, j, actual_in;
        int ret = 0;
 
@@ -2235,6 +2243,13 @@ fd3_compile_shader(struct fd3_shader_variant *so,
 
        block = ctx.block;
 
+       /* keep track of the inputs from TGSI perspective.. */
+       inputs = block->inputs;
+
+       /* but fixup actual inputs for frag shader: */
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+               fixup_frag_inputs(&ctx);
+
        /* at this point, for binning pass, throw away unneeded outputs: */
        if (key.binning_pass) {
                for (i = 0, j = 0; i < so->outputs_count; i++) {
@@ -2320,7 +2335,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
        for (i = 0; i < so->inputs_count; i++) {
                unsigned j, regid = ~0, compmask = 0;
                for (j = 0; j < 4; j++) {
-                       struct ir3_instruction *in = block->inputs[(i*4) + j];
+                       struct ir3_instruction *in = inputs[(i*4) + j];
                        if (in) {
                                compmask |= (1 << j);
                                regid = in->regs[0]->num - j;
index 9a0bbb5edff8d8180c7c4edb4d6549d206f83237..76de287b16386e435173f02ae1b683e51b81cfbd 100644 (file)
@@ -126,6 +126,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
        unsigned ret, base = 0;
        struct tgsi_shader_info *info = &ctx->info;
        const struct fd_lowering_config lconfig = {
+                       .color_two_side = so->key.color_two_side,
                        .lower_DST  = true,
                        .lower_XPD  = true,
                        .lower_SCS  = true,
@@ -1383,6 +1384,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
                        so->writes_psize = true;
                        break;
                case TGSI_SEMANTIC_COLOR:
+               case TGSI_SEMANTIC_BCOLOR:
                case TGSI_SEMANTIC_GENERIC:
                case TGSI_SEMANTIC_FOG:
                case TGSI_SEMANTIC_TEXCOORD:
index 4cdd9387f9d5695d75c85c4dc0d8ed1f3f7511c5..a84351ae88734480bac43e5442ddb98bdbacf11f 100644 (file)
@@ -284,9 +284,22 @@ static int
 find_output(const struct fd3_shader_variant *so, fd3_semantic semantic)
 {
        int j;
+
        for (j = 0; j < so->outputs_count; j++)
                if (so->outputs[j].semantic == semantic)
                        return j;
+
+       /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
+        * in the vertex shader.. but the fragment shader doesn't know this
+        * so  it will always have both IN.COLOR[n] and IN.BCOLOR[n].  So
+        * at link time if there is no matching OUT.BCOLOR[n], we must map
+        * OUT.COLOR[n] to IN.BCOLOR[n].
+        */
+       if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
+               unsigned idx = sem2idx(semantic);
+               return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx));
+       }
+
        return 0;
 }
 
index 607a5acbadbaa0cb06804f6f8f9542726ed35502..ffc7eaea53f785fe226e9ce51416cf5b96a4b67f 100644 (file)
@@ -39,6 +39,10 @@ struct fd_lowering_context {
        struct tgsi_transform_context base;
        const struct fd_lowering_config *config;
        struct tgsi_shader_info *info;
+       unsigned two_side_colors;
+       unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
+       unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
+       int face_idx;
        unsigned numtmp;
        struct {
                struct tgsi_full_src_register src;
@@ -977,56 +981,188 @@ transform_dotp(struct tgsi_transform_context *tctx,
        }
 }
 
+
+/* Two-sided color emulation:
+ * For each COLOR input, create a corresponding BCOLOR input, plus
+ * CMP instruction to select front or back color based on FACE
+ */
+#define TWOSIDE_GROW(n)  (                       \
+                       2 +         /* FACE */               \
+                       ((n) * 2) + /* IN[] BCOLOR[n] */     \
+                       ((n) * 1) + /* TEMP[] */             \
+                       ((n) * 5)   /* CMP instr */          \
+               )
+
 static void
-transform_instr(struct tgsi_transform_context *tctx,
-               struct tgsi_full_instruction *inst)
+emit_twoside(struct tgsi_transform_context *tctx)
 {
        struct fd_lowering_context *ctx = fd_lowering_context(tctx);
+       struct tgsi_shader_info *info = ctx->info;
+       struct tgsi_full_declaration decl;
+       struct tgsi_full_instruction new_inst;
+       unsigned inbase, tmpbase;
+       int i;
+
+       inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
+       tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+       /* additional inputs for BCOLOR's */
+       for (i = 0; i < ctx->two_side_colors; i++) {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+               decl.Declaration.Semantic = true;
+               decl.Range.First = decl.Range.Last = inbase + i;
+               decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+               decl.Semantic.Index =
+                       info->input_semantic_index[ctx->two_side_idx[i]];
+               tctx->emit_declaration(tctx, &decl);
+       }
 
-       if (!ctx->emitted_decls) {
-               struct tgsi_full_declaration decl;
-               struct tgsi_full_immediate immed;
-               unsigned tmpbase = ctx->info->file_max[TGSI_FILE_TEMPORARY] + 1;
-               int i;
+       /* additional input for FACE */
+       if (ctx->two_side_colors && (ctx->face_idx == -1)) {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+               decl.Declaration.Semantic = true;
+               decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
+               decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+               decl.Semantic.Index = 0;
+               tctx->emit_declaration(tctx, &decl);
+
+               ctx->face_idx = decl.Range.First;
+       }
 
-               /* declare immediate: */
-               immed = tgsi_default_full_immediate();
-               immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
-               immed.u[0].Float = 0.0;
-               immed.u[1].Float = 1.0;
-               immed.u[2].Float = 128.0;
-               immed.u[3].Float = 0.0;
-               tctx->emit_immediate(tctx, &immed);
-
-               ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
-               ctx->imm.Register.Index = ctx->info->immediate_count;
-               ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
-               ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
-               ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
-               ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
-
-               /* declare temp regs: */
-               for (i = 0; i < ctx->numtmp; i++) {
-                       decl = tgsi_default_full_declaration();
-                       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-                       decl.Range.First = decl.Range.Last = tmpbase + i;
-                       tctx->emit_declaration(tctx, &decl);
-
-                       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
-                       ctx->tmp[i].src.Register.Index = tmpbase + i;
-                       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
-                       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
-                       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
-                       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
-
-                       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
-                       ctx->tmp[i].dst.Register.Index = tmpbase + i;
-                       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+       /* additional temps for COLOR/BCOLOR selection: */
+       for (i = 0; i < ctx->two_side_colors; i++) {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_TEMPORARY;
+               decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
+               tctx->emit_declaration(tctx, &decl);
+       }
+
+       /* and finally additional instructions to select COLOR/BCOLOR: */
+       for (i = 0; i < ctx->two_side_colors; i++) {
+               new_inst = tgsi_default_full_instruction();
+               new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+
+               new_inst.Instruction.NumDstRegs = 1;
+               new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+               new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
+               new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+               new_inst.Instruction.NumSrcRegs = 3;
+               new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
+               new_inst.Src[0].Register.Index = ctx->face_idx;
+               new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+               new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+               new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+               new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+               new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
+               new_inst.Src[1].Register.Index = inbase + i;
+               new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
+               new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
+               new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+               new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
+               new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
+               new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
+               new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
+               new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
+               new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+               new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
+
+               tctx->emit_instruction(tctx, &new_inst);
+       }
+}
+
+static void
+emit_decls(struct tgsi_transform_context *tctx)
+{
+       struct fd_lowering_context *ctx = fd_lowering_context(tctx);
+       struct tgsi_shader_info *info = ctx->info;
+       struct tgsi_full_declaration decl;
+       struct tgsi_full_immediate immed;
+       unsigned tmpbase;
+       int i;
+
+       tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+       ctx->color_base = tmpbase + ctx->numtmp;
+
+       /* declare immediate: */
+       immed = tgsi_default_full_immediate();
+       immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
+       immed.u[0].Float = 0.0;
+       immed.u[1].Float = 1.0;
+       immed.u[2].Float = 128.0;
+       immed.u[3].Float = 0.0;
+       tctx->emit_immediate(tctx, &immed);
+
+       ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
+       ctx->imm.Register.Index = info->immediate_count;
+       ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
+       ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
+       ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+       ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+       /* declare temp regs: */
+       for (i = 0; i < ctx->numtmp; i++) {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_TEMPORARY;
+               decl.Range.First = decl.Range.Last = tmpbase + i;
+               tctx->emit_declaration(tctx, &decl);
+
+               ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
+               ctx->tmp[i].src.Register.Index = tmpbase + i;
+               ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
+               ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
+               ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+               ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+               ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
+               ctx->tmp[i].dst.Register.Index = tmpbase + i;
+               ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+       }
+
+       if (ctx->two_side_colors)
+               emit_twoside(tctx);
+}
+
+static void
+rename_color_inputs(struct fd_lowering_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       unsigned i, j;
+       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               struct tgsi_src_register *src = &inst->Src[i].Register;
+               if (src->File == TGSI_FILE_INPUT) {
+                       for (j = 0; j < ctx->two_side_colors; j++) {
+                               if (src->Index == ctx->two_side_idx[j]) {
+                                       src->File = TGSI_FILE_TEMPORARY;
+                                       src->Index = ctx->color_base + j;
+                                       break;
+                               }
+                       }
                }
+       }
+
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 
+       if (!ctx->emitted_decls) {
+               emit_decls(tctx);
                ctx->emitted_decls = 1;
        }
 
+       /* if emulating two-sided-color, we need to re-write some
+        * src registers:
+        */
+       if (ctx->two_side_colors)
+               rename_color_inputs(ctx, inst);
+
        switch (inst->Instruction.Opcode) {
        case TGSI_OPCODE_DST:
                if (!ctx->config->lower_DST)
@@ -1125,6 +1261,22 @@ fd_transform_lowering(const struct fd_lowering_config *config,
 
        tgsi_scan_shader(tokens, info);
 
+       /* if we are adding fragment shader support to emulate two-sided
+        * color, then figure out the number of additional inputs we need
+        * to create for BCOLOR's..
+        */
+       if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
+                       config->color_two_side) {
+               int i;
+               ctx.face_idx = -1;
+               for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
+                       if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
+                               ctx.two_side_idx[ctx.two_side_colors++] = i;
+                       if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
+                               ctx.face_idx = i;
+               }
+       }
+
 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
        /* if there are no instructions to lower, then we are done: */
        if (!(OPCS(DST) ||
@@ -1140,7 +1292,8 @@ fd_transform_lowering(const struct fd_lowering_config *config,
                        OPCS(DP3) ||
                        OPCS(DPH) ||
                        OPCS(DP2) ||
-                       OPCS(DP2A)))
+                       OPCS(DP2A) ||
+                       ctx.two_side_colors))
                return NULL;
 
 #if 0  /* debug */
@@ -1207,8 +1360,18 @@ fd_transform_lowering(const struct fd_lowering_config *config,
                numtmp = MAX2(numtmp, DOTP_TMP);
        }
 
+       /* specifically don't include two_side_colors temps in the count: */
        ctx.numtmp = numtmp;
 
+       if (ctx.two_side_colors) {
+               newlen += TWOSIDE_GROW(ctx.two_side_colors);
+               /* note: we permanently consume temp regs, re-writing references
+                * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
+                * instruction that selects which varying to use):
+                */
+               numtmp += ctx.two_side_colors;
+       }
+
        newlen += 2 * numtmp;
        newlen += 5;        /* immediate */
 
index 2862e5d3b6b313c6e555cd67ecb5a78b2dfbe97c..2d36d8faf8177329e4f071adf739c346d79e702f 100644 (file)
 #include "tgsi/tgsi_scan.h"
 
 struct fd_lowering_config {
+       /* For fragment shaders, generate a shader that emulates two
+        * sided color by inserting a BGCOLOR input for each COLOR
+        * input, and insert a CMP instruction to select the correct
+        * color to use based on the TGSI_SEMANTIC_FACE input.
+        *
+        * Note that drivers which use this to emulate two sided color
+        * will:
+        *  a) need to generate (on demand) alternate shaders to use
+        *     depending on the rasterizer state (ie. whether two
+        *     sided shading enabled)
+        *  b) expect to see the BGCOLOR semantic name in fragment
+        *     shaders.  During linkage, the driver should simply
+        *     map VS.OUT.BGCOLOR[n] to FS.IN.BGCOLOR[n] (in the
+        *     same was as linking other outs/ins).
+        */
+       unsigned color_two_side : 1;
+
+       /* TODO support for alpha_to_one as well?? */
+
        /* Individual OPC lowerings, if lower_<opc> is TRUE then
         * enable lowering of TGSI_OPCODE_<opc>
         */