From 5d2a8e8a367b397466d191f47e766ee3540a6ea6 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 23 Sep 2016 23:14:36 +0200 Subject: [PATCH] st/nine: Cleaning code for vs temporaries This has been a real mess up to now: the temporaries were allocated once, and shared after that between the different parts of the code. To help maintaining the code, the temporaries are now allocated and released on need. As surprising as it could be, this patch, which was supposed to introduce no behaviour change, actually solved a visual bug observed on a sample program. This was due to ureg_normalize3 polluting a temporary variable. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_ff.c | 233 +++++++++++++--------- 1 file changed, 135 insertions(+), 98 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index 56876f00255..7b3c67c30a3 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -316,14 +316,15 @@ build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) /* NOTE: dst may alias src */ static inline void ureg_normalize3(struct ureg_program *ureg, - struct ureg_dst dst, struct ureg_src src, - struct ureg_dst tmp) + struct ureg_dst dst, struct ureg_src src) { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); ureg_DP3(ureg, tmp_x, src, src); ureg_RSQ(ureg, tmp_x, _X(tmp)); ureg_MUL(ureg, dst, src, _X(tmp)); + ureg_release_temporary(ureg, tmp); } static void * @@ -332,15 +333,11 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) const struct nine_ff_vs_key *key = vs->key; struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); struct ureg_dst oPos, oCol[2], oPsz, oFog; - struct ureg_dst rVtx, rNrm; - struct ureg_dst r[8]; struct ureg_dst AR; - struct ureg_dst tmp, tmp_x, tmp_y, tmp_z; unsigned i, c; unsigned label[32], l = 0; - unsigned num_r = 8; - boolean need_rNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); - boolean need_rVtx = key->lighting || key->fog_mode || key->pointscale; + boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); + boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale; const unsigned texcoord_sn = get_texcoord_sn(device->screen); vs->ureg = ureg; @@ -349,13 +346,13 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) for (i = 0; i < 8 * 3; i += 3) { switch ((key->tc_gen >> i) & 0x3) { case NINED3DTSS_TCI_CAMERASPACENORMAL: - need_rNrm = TRUE; + need_aNrm = TRUE; break; case NINED3DTSS_TCI_CAMERASPACEPOSITION: - need_rVtx = TRUE; + need_aVtx = TRUE; break; case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: - need_rVtx = need_rNrm = TRUE; + need_aVtx = need_aNrm = TRUE; break; default: break; @@ -368,7 +365,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) vs->aVtx = build_vs_add_input(vs, key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); - if (need_rNrm) + if (need_aNrm) vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); vs->aCol[0] = ureg_imm1f(ureg, 1.0f); @@ -427,32 +424,27 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); } - /* Declare TEMPs: - */ - for (i = 0; i < num_r; ++i) - r[i] = ureg_DECL_temporary(ureg); - tmp = r[0]; - tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); - tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); - tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); if (key->lighting || key->vertexblend) AR = ureg_DECL_address(ureg); - rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ); - rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ); - /* === Vertex transformation / vertex blending: */ if (key->vertextween) { + struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); assert(!key->vertexblend); - ureg_LRP(ureg, r[2], _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1); - if (need_rNrm) - ureg_LRP(ureg, r[3], _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1); - vs->aVtx = ureg_src(r[2]); - vs->aNrm = ureg_src(r[3]); + ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1); + vs->aVtx = ureg_src(aVtx_dst); + if (need_aNrm) { + struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); + ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1); + vs->aNrm = ureg_src(aNrm_dst); + } } if (key->vertexblend) { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); + struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); + struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); struct ureg_src cWM[4]; for (i = 224; i <= 255; ++i) @@ -464,8 +456,8 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_ARL(ureg, AR, ureg_src(tmp)); } - ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); - ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); + ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); + ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); for (i = 0; i < key->vertexblend; ++i) { for (c = 0; c < 4; ++c) { @@ -481,27 +473,33 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (i < (key->vertexblend - 1)) { /* accumulate weighted position value */ - ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2])); + ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); /* subtract weighted position value for last value */ - ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i)); + ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i)); } } /* the last weighted position is always 1 - sum_of_previous_weights */ - ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2])); + ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); /* multiply by VIEW_PROJ */ - ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8)); - ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp)); - ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp)); - ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp)); - - if (need_rVtx) - vs->aVtx = ureg_src(r[2]); + ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); + ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); + ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); + ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); + + if (need_aVtx) + vs->aVtx = ureg_src(aVtx_dst); + + ureg_release_temporary(ureg, tmp); + ureg_release_temporary(ureg, sum_blendweights); + if (!need_aVtx) + ureg_release_temporary(ureg, aVtx_dst); } else if (key->position_t && device->driver_caps.window_space_position_support) { ureg_MOV(ureg, oPos, vs->aVtx); } else if (key->position_t) { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); /* vs->aVtx contains the coordinates buffer wise. * later in the pipeline, clipping, viewport and division * by w (rhw = 1/w) are going to be applied, so do the reverse @@ -519,60 +517,74 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) /* multiply X, Y, Z by w */ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); ureg_MOV(ureg, oPos, ureg_src(tmp)); + ureg_release_temporary(ureg, tmp); } else { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); /* position = vertex * WORLD_VIEW_PROJ */ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); + ureg_release_temporary(ureg, tmp); } - if (need_rVtx) { - ureg_MUL(ureg, rVtx, _XXXX(vs->aVtx), _CONST(4)); - ureg_MAD(ureg, rVtx, _YYYY(vs->aVtx), _CONST(5), ureg_src(rVtx)); - ureg_MAD(ureg, rVtx, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(rVtx)); - ureg_MAD(ureg, rVtx, _WWWW(vs->aVtx), _CONST(7), ureg_src(rVtx)); + if (need_aVtx) { + struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); + ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); + ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); + ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); + ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); + vs->aVtx = ureg_src(aVtx_dst); } - if (need_rNrm) { - ureg_MUL(ureg, rNrm, _XXXX(vs->aNrm), _CONST(16)); - ureg_MAD(ureg, rNrm, _YYYY(vs->aNrm), _CONST(17), ureg_src(rNrm)); - ureg_MAD(ureg, rNrm, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(rNrm)); + if (need_aNrm) { + struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); + ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); + ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); + ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); if (key->normalizenormals) - ureg_normalize3(ureg, rNrm, ureg_src(rNrm), tmp); + ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); + vs->aNrm = ureg_src(aNrm_dst); } - /* NOTE: don't use vs->aVtx, vs->aNrm after this line */ /* === Process point size: */ - if (key->vertexpointsize) { - struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); - ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); - ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); - } else if (key->pointscale) { - struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); - ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); - } + if (key->vertexpointsize || key->pointscale) { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); + struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); + struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); + if (key->vertexpointsize) { + struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); + ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); + ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); + } else { + struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); + ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); + } + + if (key->pointscale) { + struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); + struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); + + ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); + ureg_RSQ(ureg, tmp_y, _X(tmp)); + ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); + ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); + ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); + ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); + ureg_RSQ(ureg, tmp_x, _X(tmp)); + ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); + ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); + ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); + ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); + } - if (key->pointscale) { - struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); - struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); - - ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1])); - ureg_RSQ(ureg, tmp_y, _X(tmp)); - ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); - ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); - ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); - ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); - ureg_RSQ(ureg, tmp_x, _X(tmp)); - ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); - ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); - ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); - ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); - } - if (key->vertexpointsize || key->pointscale) ureg_MOV(ureg, oPsz, _Z(tmp)); + ureg_release_temporary(ureg, tmp); + } for (i = 0; i < 8; ++i) { + struct ureg_dst tmp, tmp_x; struct ureg_dst oTex, input_coord, transformed, t; unsigned c, writemask; const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; @@ -584,8 +596,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (tci == NINED3DTSS_TCI_DISABLE) continue; oTex = ureg_DECL_output(ureg, texcoord_sn, i); - input_coord = r[5]; - transformed = r[6]; + tmp = ureg_DECL_temporary(ureg); + tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + input_coord = ureg_DECL_temporary(ureg); + transformed = ureg_DECL_temporary(ureg); /* Get the coordinate */ switch (tci) { @@ -596,21 +610,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MOV(ureg, input_coord, vs->aTex[idx]); break; case NINED3DTSS_TCI_CAMERASPACENORMAL: - ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEPOSITION: - ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: tmp.WriteMask = TGSI_WRITEMASK_XYZ; - ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm)); - ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp)); + ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aNrm); + ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); - ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp)); + ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx, ureg_src(tmp)); ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); dim_input = 4; tmp.WriteMask = TGSI_WRITEMASK_XYZW; @@ -627,6 +641,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) /* dim_output == 0 => do not transform the components. * XYZRHW also disables transformation */ if (!dim_output || key->position_t) { + ureg_release_temporary(ureg, transformed); transformed = input_coord; writemask = TGSI_WRITEMASK_XYZW; } else { @@ -648,9 +663,12 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) } } writemask = (1 << dim_output) - 1; + ureg_release_temporary(ureg, input_coord); } ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); + ureg_release_temporary(ureg, transformed); + ureg_release_temporary(ureg, tmp); } /* === Lighting: @@ -695,18 +713,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * specular += light.specular * atten * powFact; */ if (key->lighting) { - struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W); - struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ); - struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ); + struct ureg_dst tmp = ureg_DECL_temporary(ureg); + struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); + struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); + struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); + struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); + struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); - struct ureg_dst rCtr = ureg_writemask(r[2], TGSI_WRITEMASK_W); + struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); /* Light.*.Alpha is not used. */ - struct ureg_dst rD = ureg_writemask(r[5], TGSI_WRITEMASK_XYZ); - struct ureg_dst rA = ureg_writemask(r[6], TGSI_WRITEMASK_XYZ); - struct ureg_dst rS = ureg_writemask(r[7], TGSI_WRITEMASK_XYZ); + struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); + struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); + struct ureg_dst rS = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); @@ -750,7 +772,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) /* hitDir = light.position - eyeVtx * d = length(hitDir) */ - ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx)); + ureg_SUB(ureg, rHit, cLPos, vs->aVtx); ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); ureg_RSQ(ureg, tmp_y, _X(tmp)); ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ @@ -767,7 +789,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_ENDIF(ureg); /* normalize hitDir */ - ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp); + ureg_normalize3(ureg, rHit, ureg_src(rHit)); /* if (SPOT light) */ ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); @@ -796,19 +818,19 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_ENDIF(ureg); /* directional factors, let's not use LIT because of clarity */ - ureg_DP3(ureg, ureg_saturate(tmp_x), ureg_src(rNrm), ureg_src(rHit)); + ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f)); ureg_IF(ureg, _X(tmp), &label[l++]); { /* midVec = normalize(hitDir + eyeDir) */ if (key->localviewer) { - ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp); + ureg_normalize3(ureg, rMid, vs->aVtx); ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid)); } else { ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f)); } - ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp); - ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid)); + ureg_normalize3(ureg, rMid, ureg_src(rMid)); + ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ @@ -862,15 +884,26 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); } ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); + ureg_release_temporary(ureg, rAtt); + ureg_release_temporary(ureg, rHit); + ureg_release_temporary(ureg, rMid); + ureg_release_temporary(ureg, rCtr); + ureg_release_temporary(ureg, rD); + ureg_release_temporary(ureg, rA); + ureg_release_temporary(ureg, rS); + ureg_release_temporary(ureg, rAtt); + ureg_release_temporary(ureg, tmp); } else /* COLOR */ if (key->darkness) { if (key->mtl_emissive == 0 && key->mtl_ambient == 0) { ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19)); } else { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE); ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp)); + ureg_release_temporary(ureg, tmp); } ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS); } else { @@ -883,15 +916,18 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * exp(x) = ex2(log2(e) * x) */ if (key->fog_mode) { + struct ureg_dst tmp = ureg_DECL_temporary(ureg); + struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); if (key->position_t) { ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } else if (key->fog_range) { - ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rVtx)); + ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); ureg_RSQ(ureg, tmp_z, _X(tmp)); ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); } else { - ureg_MOV(ureg, tmp_z, ureg_abs(_Z(rVtx))); + ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); } if (key->fog_mode == D3DFOG_EXP) { @@ -910,6 +946,7 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); } ureg_MOV(ureg, oFog, _X(tmp)); + ureg_release_temporary(ureg, tmp); } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } -- 2.30.2