X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fstate_trackers%2Fnine%2Fnine_ff.c;h=5e756b36707d47e827d3d20b74b34f2351e7b6f5;hb=9e467d111b2c9046c9b35b9e76891a8cfbb752c1;hp=e168bf87707bc12883c84b2e495ac22491622a0c;hpb=66ad5b1592bb050269216bc64aa7916056cca466;p=mesa.git diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index e168bf87707..5e756b36707 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -171,8 +171,7 @@ static void nine_ff_prune_ps(struct NineDevice9 *); static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) { if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { - unsigned count; - const struct tgsi_token *toks = ureg_get_tokens(ureg, &count); + const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL); tgsi_dump(toks, 0); ureg_free_tokens(toks); } @@ -421,7 +420,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { - oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0); + oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16); oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); } @@ -449,9 +448,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * position at the end.*/ ureg_MOV(ureg, tmp, vs->aVtx); /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ - ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101)); + ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); - ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f)); + ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); /* Y needs to be reversed */ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); /* inverse rhw */ @@ -484,7 +483,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) for (i = 0; i < key->vertexblend; ++i) { for (c = 0; c < 4; ++c) { - cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c); + cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0); if (key->vertexblend_indexed) cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); } @@ -511,7 +510,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (has_aNrm) ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); /* subtract weighted position value for last value */ - ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i)); + ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); } } @@ -661,7 +660,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); - ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_src(tmp)); + ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); ureg_release_temporary(ureg, aVtx_normed); dim_input = 4; @@ -676,7 +675,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); - ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp)); + ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); @@ -811,6 +810,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) const unsigned loop_label = l++; + /* Declare all light constants to allow indirect adressing */ + for (i = 32; i < 96; i++) + ureg_DECL_constant(ureg, i); + ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); @@ -829,7 +832,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) /* hitDir = light.position - eyeVtx * d = length(hitDir) */ - ureg_SUB(ureg, rHit, cLPos, vs->aVtx); + ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); ureg_RSQ(ureg, tmp_y, _X(tmp)); ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ @@ -863,7 +866,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff */ ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ - ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi); + ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ @@ -879,9 +882,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (has_aNrm) { if (key->localviewer) { ureg_normalize3(ureg, rMid, vs->aVtx); - ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid)); + ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); } else { - ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f)); + ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); } ureg_normalize3(ureg, rMid, ureg_src(rMid)); ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); @@ -984,7 +987,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_EX2(ureg, tmp_x, _X(tmp)); } else if (key->fog_mode == D3DFOG_LINEAR) { - ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp)); + ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); } ureg_MOV(ureg, oFog, _X(tmp)); @@ -997,35 +1000,35 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) struct ureg_src input; struct ureg_dst output; input = vs->aWgt; - output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); ureg_MOV(ureg, output, input); } if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { struct ureg_src input; struct ureg_dst output; input = vs->aInd; - output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); ureg_MOV(ureg, output, input); } if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { struct ureg_src input; struct ureg_dst output; input = vs->aNrm; - output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); ureg_MOV(ureg, output, input); } if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { struct ureg_src input; struct ureg_dst output; input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); - output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); ureg_MOV(ureg, output, input); } if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { struct ureg_src input; struct ureg_dst output; input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); - output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23); ureg_MOV(ureg, output, input); } if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { @@ -1057,7 +1060,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_END(ureg); nine_ureg_tgsi_dump(ureg, FALSE); - return ureg_create_shader_and_destroy(ureg, device->pipe); + return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); } /* PS FF constants layout: @@ -1132,7 +1135,7 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) } if (ta & D3DTA_COMPLEMENT) { struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); - ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg); + ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); reg = ureg_src(dst); } if (ta & D3DTA_ALPHAREPLICATE) @@ -1235,17 +1238,17 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u break; case D3DTOP_ADDSIGNED: ureg_ADD(ureg, tmp, arg[1], arg[2]); - ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); + ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); break; case D3DTOP_ADDSIGNED2X: ureg_ADD(ureg, tmp, arg[1], arg[2]); ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); break; case D3DTOP_SUBTRACT: - ureg_SUB(ureg, dst, arg[1], arg[2]); + ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); break; case D3DTOP_ADDSMOOTH: - ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]); + ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); break; case D3DTOP_BLENDDIFFUSEALPHA: @@ -1259,7 +1262,7 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); break; case D3DTOP_BLENDTEXTUREALPHAPM: - ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex)); + ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); break; case D3DTOP_BLENDCURRENTALPHA: @@ -1276,11 +1279,11 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); break; case D3DTOP_MODULATEINVALPHA_ADDCOLOR: - ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1])); + ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); break; case D3DTOP_MODULATEINVCOLOR_ADDALPHA: - ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]); + ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); break; case D3DTOP_BUMPENVMAP: @@ -1288,8 +1291,8 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u case D3DTOP_BUMPENVMAPLUMINANCE: break; case D3DTOP_DOTPRODUCT3: - ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5)); - ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5)); + ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); + ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); break; @@ -1460,9 +1463,9 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); } - colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; - colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; - colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; + colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; + colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; + colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; @@ -1536,14 +1539,14 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) ureg_EX2(ureg, rFog, _X(rFog)); } else if (key->fog_mode == D3DFOG_LINEAR) { - ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _X(rFog)); + ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); } ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); } else if (key->fog) { - struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE); + struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE); ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); } else { @@ -1552,7 +1555,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) ureg_END(ureg); nine_ureg_tgsi_dump(ureg, FALSE); - return ureg_create_shader_and_destroy(ureg, device->pipe); + return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); } static struct NineVertexShader9 * @@ -1680,6 +1683,7 @@ nine_ff_get_vs(struct NineDevice9 *device) key.tc_dim_output |= dim << (s * 3); } + DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key)); vs = util_hash_table_get(device->ff.ht_vs, &key); if (vs) return vs; @@ -1695,7 +1699,6 @@ nine_ff_get_vs(struct NineDevice9 *device) (void)err; assert(err == PIPE_OK); device->ff.num_vs++; - NineUnknown_ConvertRefToBind(NineUnknown(vs)); vs->num_inputs = bld.num_inputs; for (n = 0; n < bld.num_inputs; ++n) @@ -1737,7 +1740,7 @@ nine_ff_get_ps(struct NineDevice9 *device) break; } - if (!context->texture[s] && + if (!context->texture[s].enabled && ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && used_c & 0x1) || (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && @@ -1762,28 +1765,28 @@ nine_ff_get_ps(struct NineDevice9 *device) sampler_mask |= (1 << s); if (key.ts[s].colorop != D3DTOP_DISABLE) { - if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0]; - if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1]; - if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2]; - if (used_c & 0x1) key.colorarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s; - if (used_c & 0x1) key.colorarg_b5[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s; - if (used_c & 0x2) key.colorarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s; - if (used_c & 0x2) key.colorarg_b5[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s; - if (used_c & 0x4) key.colorarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s; - if (used_c & 0x4) key.colorarg_b5[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s; + if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7; + if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7; + if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7; + if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s; + if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s; + if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s; + if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s; + if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s; + if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s; } if (key.ts[s].alphaop != D3DTOP_DISABLE) { - if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0]; - if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1]; - if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2]; - if (used_a & 0x1) key.alphaarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s; - if (used_a & 0x2) key.alphaarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s; - if (used_a & 0x4) key.alphaarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s; + if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7; + if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7; + if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7; + if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s; + if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s; + if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s; } key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; - if (context->texture[s]) { - switch (context->texture[s]->base.type) { + if (context->texture[s].enabled) { + switch (context->texture[s].type) { case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; @@ -1816,7 +1819,7 @@ nine_ff_get_ps(struct NineDevice9 *device) if (s >= 1) key.ts[s-1].resultarg = 0; - key.projected = nine_ff_get_projected_key(context); + key.projected = nine_ff_get_projected_key_ff(context); key.specular = !!context->rs[D3DRS_SPECULARENABLE]; for (; s < 8; ++s) @@ -1834,6 +1837,7 @@ nine_ff_get_ps(struct NineDevice9 *device) !(projection_matrix->_34 == 0.0f && projection_matrix->_44 == 1.0f); + DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key)); ps = util_hash_table_get(device->ff.ht_ps, &key); if (ps) return ps; @@ -1847,7 +1851,6 @@ nine_ff_get_ps(struct NineDevice9 *device) (void)err; assert(err == PIPE_OK); device->ff.num_ps++; - NineUnknown_ConvertRefToBind(NineUnknown(ps)); ps->rt_mask = 0x1; ps->sampler_mask = sampler_mask; @@ -1936,7 +1939,7 @@ nine_ff_load_lights(struct NineDevice9 *device) dst[38 + l * 8].x = cosf(light->Theta * 0.5f); dst[38 + l * 8].y = cosf(light->Phi * 0.5f); dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); - dst[39 + l * 8].w = (l + 1) == context->ff.num_lights_active; + dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active); } } @@ -1946,7 +1949,7 @@ nine_ff_load_point_and_fog_params(struct NineDevice9 *device) struct nine_context *context = &device->context; struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; - if (!(context->changed.group & NINE_STATE_FF_OTHER)) + if (!(context->changed.group & NINE_STATE_FF_VS_OTHER)) return; dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); @@ -1983,7 +1986,7 @@ nine_ff_load_ps_params(struct NineDevice9 *device) struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; unsigned s; - if (!(context->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER))) + if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS)) return; for (s = 0; s < 8; ++s) @@ -2061,20 +2064,10 @@ nine_ff_update(struct NineDevice9 *device) cb.user_buffer = device->ff.vs_const; cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); - if (!device->driver_caps.user_cbufs) { - context->pipe_data.cb_vs_ff.buffer_size = cb.buffer_size; - u_upload_data(device->constbuf_uploader, - 0, - cb.buffer_size, - device->constbuf_alignment, - cb.user_buffer, - &context->pipe_data.cb_vs_ff.buffer_offset, - &context->pipe_data.cb_vs_ff.buffer); - u_upload_unmap(device->constbuf_uploader); - context->pipe_data.cb_vs_ff.user_buffer = NULL; - } else - context->pipe_data.cb_vs_ff = cb; + context->pipe_data.cb_vs_ff = cb; context->commit |= NINE_STATE_COMMIT_CONST_VS; + + context->changed.group &= ~NINE_STATE_FF_VS; } if (!context->ps) { @@ -2085,23 +2078,11 @@ nine_ff_update(struct NineDevice9 *device) cb.user_buffer = device->ff.ps_const; cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); - if (!device->driver_caps.user_cbufs) { - context->pipe_data.cb_ps_ff.buffer_size = cb.buffer_size; - u_upload_data(device->constbuf_uploader, - 0, - cb.buffer_size, - device->constbuf_alignment, - cb.user_buffer, - &context->pipe_data.cb_ps_ff.buffer_offset, - &context->pipe_data.cb_ps_ff.buffer); - u_upload_unmap(device->constbuf_uploader); - context->pipe_data.cb_ps_ff.user_buffer = NULL; - } else - context->pipe_data.cb_ps_ff = cb; + context->pipe_data.cb_ps_ff = cb; context->commit |= NINE_STATE_COMMIT_CONST_PS; - } - context->changed.group &= ~NINE_STATE_FF; + context->changed.group &= ~NINE_STATE_FF_PS; + } } @@ -2157,9 +2138,9 @@ nine_ff_prune_vs(struct NineDevice9 *device) { struct nine_context *context = &device->context; - if (device->ff.num_vs > 100) { + if (device->ff.num_vs > 1024) { /* could destroy the bound one here, so unbind */ - device->pipe->bind_vs_state(device->pipe, NULL); + context->pipe->bind_vs_state(context->pipe, NULL); util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); util_hash_table_clear(device->ff.ht_vs); device->ff.num_vs = 0; @@ -2171,9 +2152,9 @@ nine_ff_prune_ps(struct NineDevice9 *device) { struct nine_context *context = &device->context; - if (device->ff.num_ps > 100) { + if (device->ff.num_ps > 1024) { /* could destroy the bound one here, so unbind */ - device->pipe->bind_fs_state(device->pipe, NULL); + context->pipe->bind_fs_state(context->pipe, NULL); util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); util_hash_table_clear(device->ff.ht_ps); device->ff.num_ps = 0; @@ -2499,7 +2480,7 @@ nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) M->m[2][0] * D->m[0][2] + M->m[3][0] * D->m[0][3]; - if (det < 1e-30) {/* non inversible */ + if (fabsf(det) < 1e-30) {/* non inversible */ *D = *M; /* wine tests */ return; } @@ -2510,7 +2491,7 @@ nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) for (k = 0; k < 4; k++) D->m[i][k] *= det; -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) { D3DMATRIX I;