From abb436526974bd090853c0927ece0839f9143393 Mon Sep 17 00:00:00 2001 From: =?utf8?q?St=C3=A9phane=20Marchesin?= Date: Mon, 6 Jun 2011 20:40:16 -0700 Subject: [PATCH] i915g: Do generic remapping. With complex shaders there are often "holes" in the fs inputs, and we only have 8 tex coorsd to map those to. To fix this, we remap fs inputs to [0..8]. This lets us to run many more GLSL programs. --- src/gallium/drivers/i915/i915_context.h | 6 + src/gallium/drivers/i915/i915_fpc_translate.c | 170 ++++++++++-------- src/gallium/drivers/i915/i915_state_derived.c | 35 ++-- 3 files changed, 119 insertions(+), 92 deletions(-) diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index dacf50e870d..964948edc0e 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -124,6 +124,12 @@ struct i915_fragment_shader * Else, the bitmask indicates which components are occupied by immediates. */ ubyte constant_flags[I915_MAX_CONSTANT]; + + /** + * The mapping between generics and hw texture coords. + * We need to share this between the vertex and fragment stages. + **/ + int generic_mapping[I915_TEX_UNITS]; }; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 51766cd6da1..27f100843bf 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) p->error = 1; } - +static uint get_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS; i++) + { + if (fs->generic_mapping[i] == -1) { + fs->generic_mapping[i] = unit; + return i; + } + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Exceeded max generics\n"); + return 0; +} /** * Construct a ureg for the given source register. Will emit @@ -141,7 +155,8 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source) + const struct tgsi_full_src_register *source, + struct i915_fragment_shader* fs) { uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; @@ -192,15 +207,11 @@ src_vector(struct i915_fp_compile *p, src = swizzle(src, W, W, W, W); break; case TGSI_SEMANTIC_GENERIC: - if (sem_ind < 8) - /* a texcoord */ - src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); - else if ( (sem_ind >= 10) && (sem_ind < 18) ) - /* a varying */ - src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind - 10, D0_CHANNEL_ALL); - else - debug_printf("%s: unhandled generic %d\n", __func__, sem_ind); - break; + { + int real_tex_unit = get_mapping(fs, sem_ind); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); + break; + } default: i915_program_error(p, "Bad source->Index"); return 0; @@ -342,13 +353,14 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) static void emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode) + uint opcode, + struct i915_fragment_shader* fs) { uint texture = inst->Texture.Texture; uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->Src[0]); + uint coord = src_vector( p, &inst->Src[0], fs); i915_emit_texld( p, get_result_vector( p, &inst->Dst[0] ), @@ -367,15 +379,16 @@ emit_tex(struct i915_fp_compile *p, static void emit_simple_arith(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { uint arg1, arg2, arg3; assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); i915_emit_arith( p, opcode, @@ -391,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p, static void emit_simple_arith_swap2(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { struct tgsi_full_instruction inst2; @@ -402,7 +416,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, inst2.Src[0] = inst->Src[1]; inst2.Src[1] = inst->Src[0]; - emit_simple_arith(p, &inst2, opcode, numArgs); + emit_simple_arith(p, &inst2, opcode, numArgs, fs); } @@ -421,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + struct i915_fragment_shader *fs) { uint writemask; uint src0, src1, src2, flags; @@ -429,7 +444,7 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_MAX, get_result_vector(p, &inst->Dst[0]), @@ -438,13 +453,13 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_ADD: - emit_simple_arith(p, inst, A0_ADD, 2); + emit_simple_arith(p, inst, A0_ADD, 2, fs); break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), @@ -452,7 +467,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -502,8 +517,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DP2: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_DP3, @@ -513,16 +528,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DP3: - emit_simple_arith(p, inst, A0_DP3, 2); + emit_simple_arith(p, inst, A0_DP3, 2, fs); break; case TGSI_OPCODE_DP4: - emit_simple_arith(p, inst, A0_DP4, 2); + emit_simple_arith(p, inst, A0_DP4, 2, fs); break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_DP4, @@ -532,8 +547,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -553,7 +568,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_EXP, @@ -563,16 +578,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_FLR: - emit_simple_arith(p, inst, A0_FLR, 1); + emit_simple_arith(p, inst, A0_FLR, 1, fs); break; case TGSI_OPCODE_FRC: - emit_simple_arith(p, inst, A0_FRC, 1); + emit_simple_arith(p, inst, A0_FRC, 1, fs); break; case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -588,7 +603,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_LOG, @@ -598,7 +613,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -631,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -653,16 +668,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MAD: - emit_simple_arith(p, inst, A0_MAD, 3); + emit_simple_arith(p, inst, A0_MAD, 3, fs); break; case TGSI_OPCODE_MAX: - emit_simple_arith(p, inst, A0_MAX, 2); + emit_simple_arith(p, inst, A0_MAX, 2, fs); break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -679,16 +694,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - emit_simple_arith(p, inst, A0_MOV, 1); + emit_simple_arith(p, inst, A0_MOV, 1, fs); break; case TGSI_OPCODE_MUL: - emit_simple_arith(p, inst, A0_MUL, 2); + emit_simple_arith(p, inst, A0_MUL, 2, fs); break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -712,7 +727,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RCP, @@ -722,7 +737,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RSQ, @@ -732,7 +747,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* @@ -797,8 +812,8 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_SEQ: /* if we're both >= and <= then we're == */ - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -824,11 +839,11 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SGE: - emit_simple_arith(p, inst, A0_SGE, 2); + emit_simple_arith(p, inst, A0_SGE, 2, fs); break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -879,22 +894,22 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_SLE: /* like SGE, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SGE, 2); + emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); break; case TGSI_OPCODE_SLT: - emit_simple_arith(p, inst, A0_SLT, 2); + emit_simple_arith(p, inst, A0_SLT, 2, fs); break; case TGSI_OPCODE_SGT: /* like SLT, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SLT, 2); + emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); break; case TGSI_OPCODE_SNE: /* if we're neither < nor > then we're != */ - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -921,7 +936,7 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_SSG: /* compute (src>0) - (src<0) */ - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -947,8 +962,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_ADD, @@ -958,19 +973,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - emit_tex(p, inst, T0_TEXLD); + emit_tex(p, inst, T0_TEXLD, fs); break; case TGSI_OPCODE_TRUNC: - emit_simple_arith(p, inst, A0_TRC, 1); + emit_simple_arith(p, inst, A0_TRC, 1, fs); break; case TGSI_OPCODE_TXB: - emit_tex(p, inst, T0_TEXLDB); + emit_tex(p, inst, T0_TEXLDB, fs); break; case TGSI_OPCODE_TXP: - emit_tex(p, inst, T0_TEXLDP); + emit_tex(p, inst, T0_TEXLDP, fs); break; case TGSI_OPCODE_XPD: @@ -980,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -1016,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p, */ static void i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens) + const struct tgsi_token *tokens, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; @@ -1097,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p, p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction); + i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); break; default: @@ -1115,6 +1131,7 @@ i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + int i; p->shader = ifs; @@ -1127,6 +1144,9 @@ i915_init_compile(struct i915_context *i915, ifs->num_constants = 0; memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + for (i = 0; i < I915_TEX_UNITS; i++) + ifs->generic_mapping[i] = -1; + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ @@ -1296,7 +1316,7 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); i915_find_wpos_space(p); - i915_translate_instructions(p, tokens); + i915_translate_instructions(p, tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 47bea8bbd8b..bf6b30a4530 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -35,6 +35,18 @@ #include "i915_debug.h" #include "i915_reg.h" +static uint find_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS ; i++) + { + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Mapping not found\n"); + return 0; +} + /*********************************************************************** @@ -46,12 +58,12 @@ static void calculate_vertex_layout(struct i915_context *i915) const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - boolean texCoords[8], colors[2], fog, needW, have_varyings; + boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW; uint i; int src; memset(texCoords, 0, sizeof(texCoords)); - colors[0] = colors[1] = fog = needW = have_varyings = FALSE; + colors[0] = colors[1] = fog = needW = FALSE; memset(&vinfo, 0, sizeof(vinfo)); /* Determine which fragment program inputs are needed. Setup HW vertex @@ -67,19 +79,11 @@ static void calculate_vertex_layout(struct i915_context *i915) break; case TGSI_SEMANTIC_GENERIC: { - /* texcoords/varyings */ + /* texcoords/varyings/other generic */ /* XXX handle back/front face and point size */ uint unit = fs->info.input_semantic_index[i]; - /* Route varyings as tex coords */ - if ( (unit >= 10) && (unit < 18) ) { - have_varyings = TRUE; - unit -= 10; - } - - assert(unit < 8); - - texCoords[unit] = TRUE; + texCoords[find_mapping(fs, unit)] = TRUE; needW = TRUE; } break; @@ -130,14 +134,11 @@ static void calculate_vertex_layout(struct i915_context *i915) } /* texcoords/varyings */ - for (i = 0; i < 8; i++) { + for (i = 0; i < I915_TEX_UNITS; i++) { uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - if (!have_varyings) - src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); - else - src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i + 10); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { -- 2.30.2