st/nine: Fix volumetexture dtor on ctor failure
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
index 0c92bd13e8b67be9fc9d1bd30c6aea2b870cd6b2..261be276ad890eb98f1228c772c16a1767937292 100644 (file)
@@ -171,8 +171,7 @@ static void nine_ff_prune_ps(struct NineDevice9 *);
 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
 {
     if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
-        unsigned count;
-        const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
+        const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
         tgsi_dump(toks, 0);
         ureg_free_tokens(toks);
     }
@@ -449,9 +448,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
             * position at the end.*/
             ureg_MOV(ureg, tmp, vs->aVtx);
             /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
-            ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
+            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
             ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
-            ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
+            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
             /* Y needs to be reversed */
             ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
             /* inverse rhw */
@@ -484,7 +483,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
 
         for (i = 0; i < key->vertexblend; ++i) {
             for (c = 0; c < 4; ++c) {
-                cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c);
+                cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
                 if (key->vertexblend_indexed)
                     cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
             }
@@ -511,7 +510,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
                 if (has_aNrm)
                     ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
                 /* subtract weighted position value for last value */
-                ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
+                ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
             }
         }
 
@@ -661,7 +660,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
             ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
             ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
             ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
-            ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_src(tmp));
+            ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
             ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
             ureg_release_temporary(ureg, aVtx_normed);
             dim_input = 4;
@@ -676,7 +675,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
             ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
             ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
             ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
-            ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp));
+            ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
             /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
             ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
             ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
@@ -811,6 +810,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
 
         const unsigned loop_label = l++;
 
+        /* Declare all light constants to allow indirect adressing */
+        for (i = 32; i < 96; i++)
+            ureg_DECL_constant(ureg, i);
+
         ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
         ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
         ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
@@ -829,7 +832,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
             /* hitDir = light.position - eyeVtx
              * d = length(hitDir)
              */
-            ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
+            ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
             ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
             ureg_RSQ(ureg, tmp_y, _X(tmp));
             ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
@@ -863,7 +866,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
              *     spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
              */
             ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
-            ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
+            ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
             ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
             ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
             ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
@@ -879,9 +882,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
         if (has_aNrm) {
             if (key->localviewer) {
                 ureg_normalize3(ureg, rMid, vs->aVtx);
-                ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
+                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
             } else {
-                ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
+                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
             }
             ureg_normalize3(ureg, rMid, ureg_src(rMid));
             ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
@@ -984,7 +987,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
             ureg_EX2(ureg, tmp_x, _X(tmp));
         } else
         if (key->fog_mode == D3DFOG_LINEAR) {
-            ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
+            ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
             ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
         }
         ureg_MOV(ureg, oFog, _X(tmp));
@@ -1057,7 +1060,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
 
     ureg_END(ureg);
     nine_ureg_tgsi_dump(ureg, FALSE);
-    return ureg_create_shader_and_destroy(ureg, device->pipe);
+    return ureg_create_shader_and_destroy(ureg, device->context.pipe);
 }
 
 /* PS FF constants layout:
@@ -1132,7 +1135,7 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
     }
     if (ta & D3DTA_COMPLEMENT) {
         struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
-        ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
+        ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
         reg = ureg_src(dst);
     }
     if (ta & D3DTA_ALPHAREPLICATE)
@@ -1235,17 +1238,17 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
         break;
     case D3DTOP_ADDSIGNED:
         ureg_ADD(ureg, tmp, arg[1], arg[2]);
-        ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
+        ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
         break;
     case D3DTOP_ADDSIGNED2X:
         ureg_ADD(ureg, tmp, arg[1], arg[2]);
         ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
         break;
     case D3DTOP_SUBTRACT:
-        ureg_SUB(ureg, dst, arg[1], arg[2]);
+        ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
         break;
     case D3DTOP_ADDSMOOTH:
-        ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
+        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
         ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
         break;
     case D3DTOP_BLENDDIFFUSEALPHA:
@@ -1259,7 +1262,7 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
         ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
         break;
     case D3DTOP_BLENDTEXTUREALPHAPM:
-        ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
+        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
         ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
         break;
     case D3DTOP_BLENDCURRENTALPHA:
@@ -1276,11 +1279,11 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
         ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
         break;
     case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
-        ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
+        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
         ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
         break;
     case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
-        ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
+        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
         ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
         break;
     case D3DTOP_BUMPENVMAP:
@@ -1288,8 +1291,8 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
     case D3DTOP_BUMPENVMAPLUMINANCE:
         break;
     case D3DTOP_DOTPRODUCT3:
-        ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
-        ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
+        ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
+        ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
         ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
         ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
         break;
@@ -1460,9 +1463,9 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
             ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
         }
 
-        colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
-        colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
-        colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
+        colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
+        colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
+        colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
         alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
         alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
         alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
@@ -1536,7 +1539,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
             ureg_EX2(ureg, rFog, _X(rFog));
         } else
         if (key->fog_mode == D3DFOG_LINEAR) {
-            ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _X(rFog));
+            ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
             ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
         }
         ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
@@ -1552,13 +1555,12 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
 
     ureg_END(ureg);
     nine_ureg_tgsi_dump(ureg, FALSE);
-    return ureg_create_shader_and_destroy(ureg, device->pipe);
+    return ureg_create_shader_and_destroy(ureg, device->context.pipe);
 }
 
 static struct NineVertexShader9 *
 nine_ff_get_vs(struct NineDevice9 *device)
 {
-    const struct nine_state *state = &device->state;
     const struct nine_context *context = &device->context;
     struct NineVertexShader9 *vs;
     enum pipe_error err;
@@ -1619,8 +1621,8 @@ nine_ff_get_vs(struct NineDevice9 *device)
         key.passthrough = 0;
     key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
 
-    key.lighting = !!context->rs[D3DRS_LIGHTING] &&  state->ff.num_lights_active;
-    key.darkness = !!context->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
+    key.lighting = !!context->rs[D3DRS_LIGHTING] &&  context->ff.num_lights_active;
+    key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
     if (key.position_t) {
         key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
         key.lighting = 0;
@@ -1659,8 +1661,8 @@ nine_ff_get_vs(struct NineDevice9 *device)
     }
 
     for (s = 0; s < 8; ++s) {
-        unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
-        unsigned idx = state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
+        unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
+        unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
         unsigned dim;
 
         if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
@@ -1673,7 +1675,7 @@ nine_ff_get_vs(struct NineDevice9 *device)
         key.tc_idx |= idx << (s * 3);
         key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
 
-        dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
+        dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
         if (dim > 4)
             dim = input_texture_coord[idx];
         if (dim == 1) /* NV behaviour */
@@ -1681,6 +1683,7 @@ nine_ff_get_vs(struct NineDevice9 *device)
         key.tc_dim_output |= dim << (s * 3);
     }
 
+    DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
     vs = util_hash_table_get(device->ff.ht_vs, &key);
     if (vs)
         return vs;
@@ -1696,7 +1699,6 @@ nine_ff_get_vs(struct NineDevice9 *device)
         (void)err;
         assert(err == PIPE_OK);
         device->ff.num_vs++;
-        NineUnknown_ConvertRefToBind(NineUnknown(vs));
 
         vs->num_inputs = bld.num_inputs;
         for (n = 0; n < bld.num_inputs; ++n)
@@ -1708,13 +1710,12 @@ nine_ff_get_vs(struct NineDevice9 *device)
     return vs;
 }
 
-#define GET_D3DTS(n) nine_state_access_transform(&state->ff, D3DTS_##n, FALSE)
+#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
 
 static struct NinePixelShader9 *
 nine_ff_get_ps(struct NineDevice9 *device)
 {
-    struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION);
     struct NinePixelShader9 *ps;
@@ -1727,8 +1728,8 @@ nine_ff_get_ps(struct NineDevice9 *device)
 
     memset(&key, 0, sizeof(key));
     for (s = 0; s < 8; ++s) {
-        key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
-        key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
+        key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
+        key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
         const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
         const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
         /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
@@ -1739,12 +1740,12 @@ nine_ff_get_ps(struct NineDevice9 *device)
             break;
         }
 
-        if (!context->texture[s] &&
-            ((state->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
+        if (!context->texture[s].enabled &&
+            ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
               used_c & 0x1) ||
-             (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
+             (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
               used_c & 0x2) ||
-             (state->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
+             (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
               used_c & 0x4))) {
             /* Tested on Windows: Invalid texture read disables the stage
              * and the subsequent ones, but only for colorop. For alpha,
@@ -1755,37 +1756,37 @@ nine_ff_get_ps(struct NineDevice9 *device)
             break;
         }
 
-        if (state->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
-            state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
-            state->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
-            state->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
-            state->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
-            state->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
+        if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
+            context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
+            context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
+            context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
+            context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
+            context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
             sampler_mask |= (1 << s);
 
         if (key.ts[s].colorop != D3DTOP_DISABLE) {
-            if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
-            if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
-            if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
-            if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
-            if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
-            if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
-            if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
-            if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
-            if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
+            if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
+            if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
+            if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
+            if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
+            if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
+            if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
+            if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
+            if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
+            if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
         }
         if (key.ts[s].alphaop != D3DTOP_DISABLE) {
-            if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
-            if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
-            if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
-            if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
-            if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
-            if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
+            if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
+            if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
+            if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
+            if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
+            if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
+            if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
         }
-        key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
+        key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
 
-        if (context->texture[s]) {
-            switch (context->texture[s]->base.type) {
+        if (context->texture[s].enabled) {
+            switch (context->texture[s].type) {
             case D3DRTYPE_TEXTURE:       key.ts[s].textarget = 1; break;
             case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
             case D3DRTYPE_CUBETEXTURE:   key.ts[s].textarget = 3; break;
@@ -1818,7 +1819,7 @@ nine_ff_get_ps(struct NineDevice9 *device)
     if (s >= 1)
         key.ts[s-1].resultarg = 0;
 
-    key.projected = nine_ff_get_projected_key(state, context);
+    key.projected = nine_ff_get_projected_key(context);
     key.specular = !!context->rs[D3DRS_SPECULARENABLE];
 
     for (; s < 8; ++s)
@@ -1836,6 +1837,7 @@ nine_ff_get_ps(struct NineDevice9 *device)
             !(projection_matrix->_34 == 0.0f &&
               projection_matrix->_44 == 1.0f);
 
+    DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
     ps = util_hash_table_get(device->ff.ht_ps, &key);
     if (ps)
         return ps;
@@ -1849,7 +1851,6 @@ nine_ff_get_ps(struct NineDevice9 *device)
         (void)err;
         assert(err == PIPE_OK);
         device->ff.num_ps++;
-        NineUnknown_ConvertRefToBind(NineUnknown(ps));
 
         ps->rt_mask = 0x1;
         ps->sampler_mask = sampler_mask;
@@ -1860,7 +1861,6 @@ nine_ff_get_ps(struct NineDevice9 *device)
 static void
 nine_ff_load_vs_transforms(struct NineDevice9 *device)
 {
-    struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     D3DMATRIX T;
     D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
@@ -1869,9 +1869,9 @@ nine_ff_load_vs_transforms(struct NineDevice9 *device)
     /* TODO: make this nicer, and only upload the ones we need */
     /* TODO: use ff.vs_const as storage of W, V, P matrices */
 
-    if (IS_D3DTS_DIRTY(state, WORLD) ||
-        IS_D3DTS_DIRTY(state, VIEW) ||
-        IS_D3DTS_DIRTY(state, PROJECTION)) {
+    if (IS_D3DTS_DIRTY(context, WORLD) ||
+        IS_D3DTS_DIRTY(context, VIEW) ||
+        IS_D3DTS_DIRTY(context, PROJECTION)) {
         /* WVP, WV matrices */
         nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
         nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
@@ -1901,13 +1901,12 @@ nine_ff_load_vs_transforms(struct NineDevice9 *device)
 static void
 nine_ff_load_lights(struct NineDevice9 *device)
 {
-    struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
     unsigned l;
 
-    if (state->changed.group & NINE_STATE_FF_MATERIAL) {
-        const D3DMATERIAL9 *mtl = &state->ff.material;
+    if (context->changed.group & NINE_STATE_FF_MATERIAL) {
+        const D3DMATERIAL9 *mtl = &context->ff.material;
 
         memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
         memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
@@ -1920,11 +1919,11 @@ nine_ff_load_lights(struct NineDevice9 *device)
         dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
     }
 
-    if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
+    if (!(context->changed.group & NINE_STATE_FF_LIGHTING))
         return;
 
-    for (l = 0; l < state->ff.num_lights_active; ++l) {
-        const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
+    for (l = 0; l < context->ff.num_lights_active; ++l) {
+        const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
 
         dst[32 + l * 8].x = light->Type;
         dst[32 + l * 8].y = light->Attenuation0;
@@ -1940,18 +1939,17 @@ nine_ff_load_lights(struct NineDevice9 *device)
         dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
         dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
         dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
-        dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
+        dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
     }
 }
 
 static void
 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
 {
-    const struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
 
-    if (!(state->changed.group & NINE_STATE_FF_OTHER))
+    if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
         return;
     dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
     dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
@@ -1969,43 +1967,42 @@ nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
 static void
 nine_ff_load_tex_matrices(struct NineDevice9 *device)
 {
-    struct nine_state *state = &device->state;
+    struct nine_context *context = &device->context;
     D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
     unsigned s;
 
-    if (!(state->ff.changed.transform[0] & 0xff0000))
+    if (!(context->ff.changed.transform[0] & 0xff0000))
         return;
     for (s = 0; s < 8; ++s) {
-        if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
-            nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&state->ff, D3DTS_TEXTURE0 + s, FALSE));
+        if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
+            nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE));
     }
 }
 
 static void
 nine_ff_load_ps_params(struct NineDevice9 *device)
 {
-    const struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
     unsigned s;
 
-    if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
+    if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
         return;
 
     for (s = 0; s < 8; ++s)
-        d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
+        d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
 
     for (s = 0; s < 8; ++s) {
-        dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
-        dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
-        dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
-        dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
+        dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
+        dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
+        dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
+        dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
         if (s & 1) {
-            dst[16 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
-            dst[16 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
+            dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
+            dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
         } else {
-            dst[16 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
-            dst[16 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
+            dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
+            dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
         }
     }
 
@@ -2038,20 +2035,19 @@ nine_ff_load_viewport_info(struct NineDevice9 *device)
 void
 nine_ff_update(struct NineDevice9 *device)
 {
-    struct nine_state *state = &device->state;
     struct nine_context *context = &device->context;
     struct pipe_constant_buffer cb;
 
-    DBG("vs=%p ps=%p\n", context->vs, device->state.ps);
+    DBG("vs=%p ps=%p\n", context->vs, context->ps);
 
     /* NOTE: the only reference belongs to the hash table */
     if (!context->programmable_vs) {
         device->ff.vs = nine_ff_get_vs(device);
-        device->state.changed.group |= NINE_STATE_VS;
+        context->changed.group |= NINE_STATE_VS;
     }
-    if (!device->state.ps) {
+    if (!context->ps) {
         device->ff.ps = nine_ff_get_ps(device);
-        device->state.changed.group |= NINE_STATE_PS;
+        context->changed.group |= NINE_STATE_PS;
     }
 
     if (!context->programmable_vs) {
@@ -2061,30 +2057,20 @@ nine_ff_update(struct NineDevice9 *device)
         nine_ff_load_point_and_fog_params(device);
         nine_ff_load_viewport_info(device);
 
-        memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
+        memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
 
         cb.buffer_offset = 0;
         cb.buffer = NULL;
         cb.user_buffer = device->ff.vs_const;
         cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
 
-        if (!device->driver_caps.user_cbufs) {
-            context->pipe.cb_vs_ff.buffer_size = cb.buffer_size;
-            u_upload_data(device->constbuf_uploader,
-                          0,
-                          cb.buffer_size,
-                          device->constbuf_alignment,
-                          cb.user_buffer,
-                          &context->pipe.cb_vs_ff.buffer_offset,
-                          &context->pipe.cb_vs_ff.buffer);
-            u_upload_unmap(device->constbuf_uploader);
-            context->pipe.cb_vs_ff.user_buffer = NULL;
-        } else
-            context->pipe.cb_vs_ff = cb;
+        context->pipe_data.cb_vs_ff = cb;
         context->commit |= NINE_STATE_COMMIT_CONST_VS;
+
+        context->changed.group &= ~NINE_STATE_FF_VS;
     }
 
-    if (!device->state.ps) {
+    if (!context->ps) {
         nine_ff_load_ps_params(device);
 
         cb.buffer_offset = 0;
@@ -2092,23 +2078,11 @@ nine_ff_update(struct NineDevice9 *device)
         cb.user_buffer = device->ff.ps_const;
         cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
 
-        if (!device->driver_caps.user_cbufs) {
-            context->pipe.cb_ps_ff.buffer_size = cb.buffer_size;
-            u_upload_data(device->constbuf_uploader,
-                          0,
-                          cb.buffer_size,
-                          device->constbuf_alignment,
-                          cb.user_buffer,
-                          &context->pipe.cb_ps_ff.buffer_offset,
-                          &context->pipe.cb_ps_ff.buffer);
-            u_upload_unmap(device->constbuf_uploader);
-            context->pipe.cb_ps_ff.user_buffer = NULL;
-        } else
-            context->pipe.cb_ps_ff = cb;
+        context->pipe_data.cb_ps_ff = cb;
         context->commit |= NINE_STATE_COMMIT_CONST_PS;
-    }
 
-    device->state.changed.group &= ~NINE_STATE_FF;
+        context->changed.group &= ~NINE_STATE_FF_PS;
+    }
 }
 
 
@@ -2162,25 +2136,29 @@ nine_ff_fini(struct NineDevice9 *device)
 static void
 nine_ff_prune_vs(struct NineDevice9 *device)
 {
+    struct nine_context *context = &device->context;
+
     if (device->ff.num_vs > 100) {
         /* could destroy the bound one here, so unbind */
-        device->pipe->bind_vs_state(device->pipe, NULL);
+        context->pipe->bind_vs_state(context->pipe, NULL);
         util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
         util_hash_table_clear(device->ff.ht_vs);
         device->ff.num_vs = 0;
-        device->state.changed.group |= NINE_STATE_VS;
+        context->changed.group |= NINE_STATE_VS;
     }
 }
 static void
 nine_ff_prune_ps(struct NineDevice9 *device)
 {
+    struct nine_context *context = &device->context;
+
     if (device->ff.num_ps > 100) {
         /* could destroy the bound one here, so unbind */
-        device->pipe->bind_fs_state(device->pipe, NULL);
+        context->pipe->bind_fs_state(context->pipe, NULL);
         util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
         util_hash_table_clear(device->ff.ht_ps);
         device->ff.num_ps = 0;
-        device->state.changed.group |= NINE_STATE_PS;
+        context->changed.group |= NINE_STATE_PS;
     }
 }
 
@@ -2502,7 +2480,7 @@ nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
         M->m[2][0] * D->m[0][2] +
         M->m[3][0] * D->m[0][3];
 
-    if (det < 1e-30) {/* non inversible */
+    if (fabsf(det) < 1e-30) {/* non inversible */
         *D = *M; /* wine tests */
         return;
     }