/* Writemasks */
#define R500_WRITEMASK_G 0x2
#define R500_WRITEMASK_A 0x8
+#define R500_WRITEMASK_AR 0x9
+#define R500_WRITEMASK_AG 0xA
+#define R500_WRITEMASK_ARG 0xB
+#define R500_WRITEMASK_AB 0xC
#define R500_WRITEMASK_ARGB 0xF
/* 1/(2pi), needed for quick modulus in trig insts
| R500_ALU_RGBA_ADDRD(dest);
break;
case OPCODE_LIT:
- /* I think I've got a pretty good path through this.
- * MAX temp1, tmp, [0, 0, 0, -128];
- * MIN temp1.w, temp1.w, [128];
- * POW temp1.z, temp1.y, temp1.w; (3 insts)
- * MOV result.xyzw, [1, temp1.y, temp1.z, 1]; */
+ /* To be honest, I have no idea how I came up with the following.
+ * All I know is that it's based on the r3xx stuff, and was
+ * concieved with the help of NyQuil. Mmm, MyQuil. */
+
+ /* First instruction */
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = emit_const4fv(fp, LIT);
fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
+ | (R500_WRITEMASK_ARG << 11);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
| R500_ALPHA_ADDRD(get_temp(fp, 0))
fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
| R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
counter++;
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11);
+ /* Second instruction */
+ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]);
+ /* Select [z, z, z, y] */
+ temp_swiz = 2 | (2 << 3) | (2 << 6);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
+ | MAKE_SWIZ_RGB_A(temp_swiz)
| R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
- fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
+ fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
| R500_ALPHA_ADDRD(get_temp(fp, 0))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A | R500_ALPHA_MOD_B_NEG;
- fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
+ | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G;
+ fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
| R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
counter++;
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
+ /* Third instruction */
+ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11);
fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
- fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
- fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
- | R500_ALPHA_ADDRD(get_temp(fp, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G;
- fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
- | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
- counter++;
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
- | R500_RGB_ADDR1(get_temp(fp, 1));
- fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
- | R500_ALPHA_ADDR1(get_temp(fp, 1));
- /* Select [w, w, w, w] */
- temp_swiz = 3 | (3 << 3) | (3 << 6);
+ /* Select [x, x, x, z] */
+ temp_swiz = 0;
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
| MAKE_SWIZ_RGB_A(temp_swiz)
- | R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
+ | R500_ALU_RGB_SEL_B_SRC0
+ | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
| R500_ALPHA_ADDRD(get_temp(fp, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
+ | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_B
+ | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_A;
fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
| R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
+ | R500_ALU_RGBA_A_SWIZ_0;
counter++;
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_G << 11);
- fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
- fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
- fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
+ /* Fourth instruction */
+ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11);
+ fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
+ fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
+ | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
| R500_ALPHA_ADDRD(get_temp(fp, 0))
| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
- fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
- | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
+ fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
+ | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
+ | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
+ | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
counter++;
- emit_alu(fp, counter, fpi);
+ /* Fifth instruction */
+ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
- fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
- /* Select [1, y, z, 1] */
- temp_swiz = R500_SWIZZLE_ONE | (2 << 3) | (3 << 6);
+ /* Select [w, w, w] */
+ temp_swiz = 3 | (3 << 3) | (3 << 6);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(temp_swiz)
+ | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
| R500_ALU_RGB_SEL_B_SRC0
- | MAKE_SWIZ_RGB_B(temp_swiz)
- | R500_ALU_RGB_OMOD_DISABLE;
- fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1
- | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1
- | R500_ALPHA_OMOD_DISABLE;
+ | MAKE_SWIZ_RGB_B(temp_swiz);
+ fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
+ | R500_ALPHA_ADDRD(get_temp(fp, 0))
+ | R500_ALPHA_SWIZ_A_1
+ | R500_ALPHA_SWIZ_B_1;
+ /* Select [-y, -y, -y] */
+ temp_swiz = 1 | (1 << 3) | (1 << 6);
fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
+ | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
+ | MAKE_SWIZ_RGBA_C(temp_swiz)
+ | R500_ALU_RGBA_MOD_C_NEG
| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
+ counter++;
+ /* Final instruction */
+ emit_alu(fp, counter, fpi);
+ fpi->SrcReg[0].Index = get_temp(fp, 0);
+ fpi->SrcReg[0].Swizzle = 1672;
+ emit_mov(fp, counter, fpi->SrcReg[0], dest);
break;
case OPCODE_LRP:
/* src0 * src1 + INV(src0) * src2