fp->translated = GL_TRUE;
}
-/* static void emit_alu(struct r500_fragment_program *fp) {
- * } */
+static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
+ if (fpi->DstReg.Index == PROGRAM_OUTPUT) {
+ fp->inst[counter].inst0 = R500_INST_TYPE_OUT
+ /* output_mask */
+ | (fpi->DstReg.WriteMask << 14);
+ } else {
+ fp->inst[counter].inst0 = R500_INST_TYPE_ALU
+ /* pixel_mask */
+ | (fpi->DstReg.WriteMask << 11);
+ }
+
+ fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
+}
static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
/* The r3xx shader uses MAD to implement MOV. We are using CMP, since
* it is technically more accurate and recommended by ATI/AMD. */
GLuint src_reg = make_src(fp, src);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT;
fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
const struct prog_instruction *inst = mp->Base.Instructions;
struct prog_instruction *fpi;
GLuint src[3], dest, temp[2];
- int flags, pixel_mask = 0, output_mask = 0, counter = 0, temp_pixel_mask = 0;
+ int flags, pixel_mask = 0, output_mask = 0, counter = 0;
if (!inst || inst[0].Opcode == OPCODE_END) {
ERROR("The program is empty!\n");
switch (fpi->Opcode) {
case OPCODE_ABS:
+ emit_alu(fp, counter, fpi);
emit_mov(fp, counter, fpi->SrcReg[0], dest);
- fp->inst[counter].inst0 |= pixel_mask;
fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
| R500_ALU_RGB_MOD_B_ABS;
fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
/* Variation on MAD: 1*src0+src1 */
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
src[2] = make_src(fp, fpi->SrcReg[2]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
break;
case OPCODE_COS:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
case OPCODE_DP3:
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
/* Based on DP3 */
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
/* Based on DP3 */
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
break;
case OPCODE_EX2:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
break;
case OPCODE_FRC:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
break;
case OPCODE_LG2:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
src[2] = make_src(fp, fpi->SrcReg[2]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
case OPCODE_MAX:
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
case OPCODE_MIN:
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
| R500_ALU_RGBA_ADDRD(dest);
break;
case OPCODE_MOV:
+ emit_alu(fp, counter, fpi);
emit_mov(fp, counter, fpi->SrcReg[0], dest);
- fp->inst[counter].inst0 |= pixel_mask;
break;
case OPCODE_MUL:
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
/* Variation on MAD: src0*src1+0 */
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
| R500_RGB_ADDR1(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
break;
case OPCODE_RCP:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
break;
case OPCODE_RSQ:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
/* Do a cosine, then a sine, masking out the channels we want to protect. */
src[0] = make_src(fp, fpi->SrcReg[0]);
/* Cosine only goes in R (x) channel. */
+ fpi->DstReg.WriteMask = 0x1;
+ emit_alu(fp, counter, fpi);
if (fpi->DstReg.File == PROGRAM_OUTPUT) {
fp->inst[counter].inst0 = R500_INST_TYPE_OUT
| R500_INST_TEX_SEM_WAIT | 0x1 << 14;
| R500_ALU_RGBA_ADDRD(dest);
counter++;
/* Sine only goes in G (y) channel. */
- if (fpi->DstReg.File == PROGRAM_OUTPUT) {
- fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0x2 << 14;
- } else {
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0x2 << 11;
- }
+ fpi->DstReg.WriteMask = 0x2;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
| R500_ALU_RGBA_ADDRD(dest);
counter++;
/* Put 0 into B,A (z,w) channels. */
- if (fpi->DstReg.File == PROGRAM_OUTPUT) {
- fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0xC << 14;
- } else {
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0xC << 11;
- }
+ fpi->DstReg.WriteMask = 0xC;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
break;
case OPCODE_SIN:
src[0] = make_src(fp, fpi->SrcReg[0]);
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | R500_INST_TEX_SEM_WAIT | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
src[0] = make_src(fp, fpi->SrcReg[0]);
src[1] = make_src(fp, fpi->SrcReg[1]);
/* Variation on MAD: 1*src0-src1 */
- fp->inst[counter].inst0 = R500_INST_TYPE_ALU
- | pixel_mask;
+ emit_alu(fp, counter, fpi);
fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
| R500_RGB_ADDR2(src[1]);
fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
break;
case OPCODE_SWZ:
/* TODO: Negation masks! */
+ emit_alu(fp, counter, fpi);
emit_mov(fp, counter, fpi->SrcReg[0], dest);
- fp->inst[counter].inst0 |= pixel_mask;
break;
case OPCODE_TEX:
emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
}
- if (fpi->DstReg.File == PROGRAM_OUTPUT) {
- fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | output_mask;
- }
counter++;
}
/* Finish him! (If it's an ALU/OUT instruction...) */
- if ((fp->inst[counter-1].inst0 & 0x3) <= 1) {
- fp->inst[counter-1].inst0 |= R500_INST_TYPE_OUT
- | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
+ if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
+ fp->inst[counter-1].inst0 |= R500_INST_LAST;
} else {
/* We still need to put an output inst, right? */
fp->inst[counter].inst0 = R500_INST_TYPE_OUT
fp->inst[counter].inst1 = R500_RGB_ADDR0(dest);
fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest);
fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
+ | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
| R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
fp->inst[counter].inst4 = R500_ALPHA_OP_MAD