From 26530716ab9398703f91285381033073f47e8bd4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 23 Feb 2014 14:40:41 -0500 Subject: [PATCH] freedreno/lowering: two-sided-color Add option to generate fragment shader to emulate two sided color. Additional inputs are added to shader for BCOLOR's (on corresponding to each COLOR input). CMP instructions are used to select whether to use COLOR or BCOLOR. Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_compiler.c | 47 ++-- .../drivers/freedreno/a3xx/fd3_compiler_old.c | 2 + .../drivers/freedreno/a3xx/fd3_program.c | 13 + .../drivers/freedreno/freedreno_lowering.c | 245 +++++++++++++++--- .../drivers/freedreno/freedreno_lowering.h | 19 ++ 5 files changed, 269 insertions(+), 57 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index ae88d6ef54d..7450fac23c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -152,6 +152,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, unsigned ret; struct tgsi_shader_info *info = &ctx->info; const struct fd_lowering_config lconfig = { + .color_two_side = so->key.color_two_side, .lower_DST = true, .lower_XPD = true, .lower_SCS = true, @@ -2003,6 +2004,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->writes_psize = true; break; case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: @@ -2059,24 +2061,31 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) { struct fd3_shader_variant *so = ctx->so; struct ir3_block *block = ctx->block; + struct ir3_instruction **inputs; struct ir3_instruction *instr; - int regid = 0; + int n, regid = 0; block->ninputs = 0; + n = 4; /* always have frag_pos */ + n += COND(so->frag_face, 4); + n += COND(so->frag_coord, 4); + + inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); + if (so->frag_face) { /* this ultimately gets assigned to hr0.x so doesn't conflict * with frag_coord/frag_pos.. */ - block->inputs[block->ninputs++] = ctx->frag_face; + inputs[block->ninputs++] = ctx->frag_face; ctx->frag_face->regs[0]->num = 0; /* remaining channels not used, but let's avoid confusing * other parts that expect inputs to come in groups of vec4 */ - block->inputs[block->ninputs++] = NULL; - block->inputs[block->ninputs++] = NULL; - block->inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; } /* since we don't know where to set the regid for frag_coord, @@ -2090,10 +2099,10 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) ctx->frag_coord[2]->regs[0]->num = regid++; ctx->frag_coord[3]->regs[0]->num = regid++; - block->inputs[block->ninputs++] = ctx->frag_coord[0]; - block->inputs[block->ninputs++] = ctx->frag_coord[1]; - block->inputs[block->ninputs++] = ctx->frag_coord[2]; - block->inputs[block->ninputs++] = ctx->frag_coord[3]; + inputs[block->ninputs++] = ctx->frag_coord[0]; + inputs[block->ninputs++] = ctx->frag_coord[1]; + inputs[block->ninputs++] = ctx->frag_coord[2]; + inputs[block->ninputs++] = ctx->frag_coord[3]; } /* we always have frag_pos: */ @@ -2102,14 +2111,16 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) /* r0.x */ instr = create_input(block, NULL, block->ninputs); instr->regs[0]->num = regid++; - block->inputs[block->ninputs++] = instr; + inputs[block->ninputs++] = instr; ctx->frag_pos->regs[1]->instr = instr; /* r0.y */ instr = create_input(block, NULL, block->ninputs); instr->regs[0]->num = regid++; - block->inputs[block->ninputs++] = instr; + inputs[block->ninputs++] = instr; ctx->frag_pos->regs[2]->instr = instr; + + block->inputs = inputs; } static void @@ -2189,10 +2200,6 @@ compile_instructions(struct fd3_compile_context *ctx) break; } } - - /* fixup actual inputs for frag shader: */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) - fixup_frag_inputs(ctx); } static void @@ -2217,6 +2224,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, { struct fd3_compile_context ctx; struct ir3_block *block; + struct ir3_instruction **inputs; unsigned i, j, actual_in; int ret = 0; @@ -2235,6 +2243,13 @@ fd3_compile_shader(struct fd3_shader_variant *so, block = ctx.block; + /* keep track of the inputs from TGSI perspective.. */ + inputs = block->inputs; + + /* but fixup actual inputs for frag shader: */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) + fixup_frag_inputs(&ctx); + /* at this point, for binning pass, throw away unneeded outputs: */ if (key.binning_pass) { for (i = 0, j = 0; i < so->outputs_count; i++) { @@ -2320,7 +2335,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, for (i = 0; i < so->inputs_count; i++) { unsigned j, regid = ~0, compmask = 0; for (j = 0; j < 4; j++) { - struct ir3_instruction *in = block->inputs[(i*4) + j]; + struct ir3_instruction *in = inputs[(i*4) + j]; if (in) { compmask |= (1 << j); regid = in->regs[0]->num - j; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index 9a0bbb5edff..76de287b163 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -126,6 +126,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, unsigned ret, base = 0; struct tgsi_shader_info *info = &ctx->info; const struct fd_lowering_config lconfig = { + .color_two_side = so->key.color_two_side, .lower_DST = true, .lower_XPD = true, .lower_SCS = true, @@ -1383,6 +1384,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->writes_psize = true; break; case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 4cdd9387f9d..a84351ae887 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -284,9 +284,22 @@ static int find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) { int j; + for (j = 0; j < so->outputs_count; j++) if (so->outputs[j].semantic == semantic) return j; + + /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] + * in the vertex shader.. but the fragment shader doesn't know this + * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So + * at link time if there is no matching OUT.BCOLOR[n], we must map + * OUT.COLOR[n] to IN.BCOLOR[n]. + */ + if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) { + unsigned idx = sem2idx(semantic); + return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); + } + return 0; } diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.c b/src/gallium/drivers/freedreno/freedreno_lowering.c index 607a5acbadb..ffc7eaea53f 100644 --- a/src/gallium/drivers/freedreno/freedreno_lowering.c +++ b/src/gallium/drivers/freedreno/freedreno_lowering.c @@ -39,6 +39,10 @@ struct fd_lowering_context { struct tgsi_transform_context base; const struct fd_lowering_config *config; struct tgsi_shader_info *info; + unsigned two_side_colors; + unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; + unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ + int face_idx; unsigned numtmp; struct { struct tgsi_full_src_register src; @@ -977,56 +981,188 @@ transform_dotp(struct tgsi_transform_context *tctx, } } + +/* Two-sided color emulation: + * For each COLOR input, create a corresponding BCOLOR input, plus + * CMP instruction to select front or back color based on FACE + */ +#define TWOSIDE_GROW(n) ( \ + 2 + /* FACE */ \ + ((n) * 2) + /* IN[] BCOLOR[n] */ \ + ((n) * 1) + /* TEMP[] */ \ + ((n) * 5) /* CMP instr */ \ + ) + static void -transform_instr(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) +emit_twoside(struct tgsi_transform_context *tctx) { struct fd_lowering_context *ctx = fd_lowering_context(tctx); + struct tgsi_shader_info *info = ctx->info; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction new_inst; + unsigned inbase, tmpbase; + int i; + + inbase = info->file_max[TGSI_FILE_INPUT] + 1; + tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; + + /* additional inputs for BCOLOR's */ + for (i = 0; i < ctx->two_side_colors; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = true; + decl.Range.First = decl.Range.Last = inbase + i; + decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; + decl.Semantic.Index = + info->input_semantic_index[ctx->two_side_idx[i]]; + tctx->emit_declaration(tctx, &decl); + } - if (!ctx->emitted_decls) { - struct tgsi_full_declaration decl; - struct tgsi_full_immediate immed; - unsigned tmpbase = ctx->info->file_max[TGSI_FILE_TEMPORARY] + 1; - int i; + /* additional input for FACE */ + if (ctx->two_side_colors && (ctx->face_idx == -1)) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = true; + decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; + decl.Semantic.Name = TGSI_SEMANTIC_FACE; + decl.Semantic.Index = 0; + tctx->emit_declaration(tctx, &decl); + + ctx->face_idx = decl.Range.First; + } - /* declare immediate: */ - immed = tgsi_default_full_immediate(); - immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ - immed.u[0].Float = 0.0; - immed.u[1].Float = 1.0; - immed.u[2].Float = 128.0; - immed.u[3].Float = 0.0; - tctx->emit_immediate(tctx, &immed); - - ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; - ctx->imm.Register.Index = ctx->info->immediate_count; - ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; - ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; - ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; - - /* declare temp regs: */ - for (i = 0; i < ctx->numtmp; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.Range.First = decl.Range.Last = tmpbase + i; - tctx->emit_declaration(tctx, &decl); - - ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; - ctx->tmp[i].src.Register.Index = tmpbase + i; - ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; - ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; - ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; - - ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; - ctx->tmp[i].dst.Register.Index = tmpbase + i; - ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; + /* additional temps for COLOR/BCOLOR selection: */ + for (i = 0; i < ctx->two_side_colors; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; + tctx->emit_declaration(tctx, &decl); + } + + /* and finally additional instructions to select COLOR/BCOLOR: */ + for (i = 0; i < ctx->two_side_colors; i++) { + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; + + new_inst.Instruction.NumDstRegs = 1; + new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; + new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + + new_inst.Instruction.NumSrcRegs = 3; + new_inst.Src[0].Register.File = TGSI_FILE_INPUT; + new_inst.Src[0].Register.Index = ctx->face_idx; + new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; + new_inst.Src[1].Register.File = TGSI_FILE_INPUT; + new_inst.Src[1].Register.Index = inbase + i; + new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; + new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; + new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + new_inst.Src[2].Register.File = TGSI_FILE_INPUT; + new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; + new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; + new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; + new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; + + tctx->emit_instruction(tctx, &new_inst); + } +} + +static void +emit_decls(struct tgsi_transform_context *tctx) +{ + struct fd_lowering_context *ctx = fd_lowering_context(tctx); + struct tgsi_shader_info *info = ctx->info; + struct tgsi_full_declaration decl; + struct tgsi_full_immediate immed; + unsigned tmpbase; + int i; + + tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; + + ctx->color_base = tmpbase + ctx->numtmp; + + /* declare immediate: */ + immed = tgsi_default_full_immediate(); + immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ + immed.u[0].Float = 0.0; + immed.u[1].Float = 1.0; + immed.u[2].Float = 128.0; + immed.u[3].Float = 0.0; + tctx->emit_immediate(tctx, &immed); + + ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; + ctx->imm.Register.Index = info->immediate_count; + ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; + + /* declare temp regs: */ + for (i = 0; i < ctx->numtmp; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = tmpbase + i; + tctx->emit_declaration(tctx, &decl); + + ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].src.Register.Index = tmpbase + i; + ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; + + ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].dst.Register.Index = tmpbase + i; + ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; + } + + if (ctx->two_side_colors) + emit_twoside(tctx); +} + +static void +rename_color_inputs(struct fd_lowering_context *ctx, + struct tgsi_full_instruction *inst) +{ + unsigned i, j; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_src_register *src = &inst->Src[i].Register; + if (src->File == TGSI_FILE_INPUT) { + for (j = 0; j < ctx->two_side_colors; j++) { + if (src->Index == ctx->two_side_idx[j]) { + src->File = TGSI_FILE_TEMPORARY; + src->Index = ctx->color_base + j; + break; + } + } } + } + +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct fd_lowering_context *ctx = fd_lowering_context(tctx); + if (!ctx->emitted_decls) { + emit_decls(tctx); ctx->emitted_decls = 1; } + /* if emulating two-sided-color, we need to re-write some + * src registers: + */ + if (ctx->two_side_colors) + rename_color_inputs(ctx, inst); + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_DST: if (!ctx->config->lower_DST) @@ -1125,6 +1261,22 @@ fd_transform_lowering(const struct fd_lowering_config *config, tgsi_scan_shader(tokens, info); + /* if we are adding fragment shader support to emulate two-sided + * color, then figure out the number of additional inputs we need + * to create for BCOLOR's.. + */ + if ((info->processor == TGSI_PROCESSOR_FRAGMENT) && + config->color_two_side) { + int i; + ctx.face_idx = -1; + for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { + if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) + ctx.two_side_idx[ctx.two_side_colors++] = i; + if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) + ctx.face_idx = i; + } + } + #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) /* if there are no instructions to lower, then we are done: */ if (!(OPCS(DST) || @@ -1140,7 +1292,8 @@ fd_transform_lowering(const struct fd_lowering_config *config, OPCS(DP3) || OPCS(DPH) || OPCS(DP2) || - OPCS(DP2A))) + OPCS(DP2A) || + ctx.two_side_colors)) return NULL; #if 0 /* debug */ @@ -1207,8 +1360,18 @@ fd_transform_lowering(const struct fd_lowering_config *config, numtmp = MAX2(numtmp, DOTP_TMP); } + /* specifically don't include two_side_colors temps in the count: */ ctx.numtmp = numtmp; + if (ctx.two_side_colors) { + newlen += TWOSIDE_GROW(ctx.two_side_colors); + /* note: we permanently consume temp regs, re-writing references + * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP + * instruction that selects which varying to use): + */ + numtmp += ctx.two_side_colors; + } + newlen += 2 * numtmp; newlen += 5; /* immediate */ diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.h b/src/gallium/drivers/freedreno/freedreno_lowering.h index 2862e5d3b6b..2d36d8faf81 100644 --- a/src/gallium/drivers/freedreno/freedreno_lowering.h +++ b/src/gallium/drivers/freedreno/freedreno_lowering.h @@ -33,6 +33,25 @@ #include "tgsi/tgsi_scan.h" struct fd_lowering_config { + /* For fragment shaders, generate a shader that emulates two + * sided color by inserting a BGCOLOR input for each COLOR + * input, and insert a CMP instruction to select the correct + * color to use based on the TGSI_SEMANTIC_FACE input. + * + * Note that drivers which use this to emulate two sided color + * will: + * a) need to generate (on demand) alternate shaders to use + * depending on the rasterizer state (ie. whether two + * sided shading enabled) + * b) expect to see the BGCOLOR semantic name in fragment + * shaders. During linkage, the driver should simply + * map VS.OUT.BGCOLOR[n] to FS.IN.BGCOLOR[n] (in the + * same was as linking other outs/ins). + */ + unsigned color_two_side : 1; + + /* TODO support for alpha_to_one as well?? */ + /* Individual OPC lowerings, if lower_ is TRUE then * enable lowering of TGSI_OPCODE_ */ -- 2.30.2