+#include <float.h>
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
case TGSI_OPCODE_LG2:
nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
break;
-// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LIT:
+ if(!nvfx->is_nv4x)
+ nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], src[1], src[2]));
+ else {
+ /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by
+ * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement
+ *
+ * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead
+ */
+ float maxv[4] = {0, FLT_MIN, 0, 0};
+ struct nvfx_src maxs = nvfx_src(constant(fpc, -1, maxv));
+ tmp = nvfx_src(temp(fpc));
+ if (ci>= 0 || ii >= 0) {
+ nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
+ maxs = tmp;
+ }
+ nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
+ nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
+ nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
+ nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
+ }
+ break;
case TGSI_OPCODE_LRP:
if(!nvfx->is_nv4x)
nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));