From 364d45a3e1629f32c6ab5407f92618a16c9d45e0 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 5 Jul 2008 16:07:37 +0200 Subject: [PATCH] r500: Major refactoring of fragment program emit Use the common facilities to convert non-native instructions into native ones. Worked hard to make the code easier to read (hopefully), by using helper functions instead of direct manipulation of the machine code. Fixes two bugs related to FLR and XPD. --- src/mesa/drivers/dri/r300/r300_reg.h | 6 + src/mesa/drivers/dri/r300/r500_fragprog.c | 9 +- .../drivers/dri/r300/r500_fragprog_emit.c | 1218 ++++++----------- 3 files changed, 458 insertions(+), 775 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 58a19554c71..cd232c5b7b2 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2705,6 +2705,7 @@ enum { # define R500_ALPHA_OP_MDV 15 # define R500_ALPHA_ADDRD(x) (x << 4) # define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SHIFT 12 # define R500_ALPHA_SEL_A_SRC0 (0 << 12) # define R500_ALPHA_SEL_A_SRC1 (1 << 12) # define R500_ALPHA_SEL_A_SRC2 (2 << 12) @@ -2721,6 +2722,7 @@ enum { # define R500_ALPHA_MOD_A_NEG (1 << 17) # define R500_ALPHA_MOD_A_ABS (2 << 17) # define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SHIFT 19 # define R500_ALPHA_SEL_B_SRC0 (0 << 19) # define R500_ALPHA_SEL_B_SRC1 (1 << 19) # define R500_ALPHA_SEL_B_SRC2 (2 << 19) @@ -2777,6 +2779,7 @@ enum { # define R500_ALU_RGBA_OP_MDV (12 << 0) # define R500_ALU_RGBA_ADDRD(x) (x << 4) # define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SHIFT 12 # define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) # define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) # define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) @@ -2809,6 +2812,7 @@ enum { # define R500_ALU_RGBA_MOD_C_NEG (1 << 23) # define R500_ALU_RGBA_MOD_C_ABS (2 << 23) # define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 # define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) # define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) # define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) @@ -2826,6 +2830,7 @@ enum { # define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) # define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) #define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SHIFT 0 # define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) # define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) # define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) @@ -2858,6 +2863,7 @@ enum { # define R500_ALU_RGB_MOD_A_NEG (1 << 11) # define R500_ALU_RGB_MOD_A_ABS (2 << 11) # define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SHIFT 13 # define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) # define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) # define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 62e06ea52c0..b46e924ac7b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,6 +27,8 @@ #include "r500_fragprog.h" +#include "radeon_program_alu.h" + /** * Transform TEX, TXP, TXB, and KIL instructions in the following way: @@ -316,11 +318,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); - struct radeon_program_transformation transformations[1] = { - { &transform_TEX, &compiler } + struct radeon_program_transformation transformations[2] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 } }; radeonLocalTransform(r300->radeon.glCtx, compiler.program, - 1, transformations); + 2, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: after all transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 67545cbb4f6..0e95c81e482 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -168,6 +168,12 @@ static const GLfloat LIT[] = {127.999999, 127.999999, -127.999999}; +static const struct prog_dst_register dstreg_template = { + .File = PROGRAM_TEMPORARY, + .Index = 0, + .WriteMask = WRITEMASK_XYZW +}; + static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp; @@ -179,8 +185,14 @@ static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) { if (temp == 5) temp++; swiz |= temp << i*3; } - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 9); + if (src.Abs) { + swiz |= R500_SWIZ_MOD_ABS << 9; + } else if (src.NegateBase & 7) { + ASSERT((src.NegateBase & 7) == 7); + swiz |= R500_SWIZ_MOD_NEG << 9; + } + if (src.NegateAbs) + swiz ^= R500_SWIZ_MOD_NEG << 9; return swiz; } @@ -202,8 +214,13 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) { if (swiz == 5) swiz++; - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 3); + if (src.Abs) { + swiz |= R500_SWIZ_MOD_ABS << 3; + } else if (src.NegateBase & 8) { + swiz |= R500_SWIZ_MOD_NEG << 3; + } + if (src.NegateAbs) + swiz ^= R500_SWIZ_MOD_NEG << 3; return swiz; } @@ -212,6 +229,15 @@ static INLINE GLuint make_sop_swizzle(struct prog_src_register src) { GLuint swiz = GET_SWZ(src.Swizzle, 0); if (swiz == 5) swiz++; + + if (src.Abs) { + swiz |= R500_SWIZ_MOD_ABS << 3; + } else if (src.NegateBase & 1) { + swiz |= R500_SWIZ_MOD_NEG << 3; + } + if (src.NegateAbs) + swiz ^= R500_SWIZ_MOD_NEG << 3; + return swiz; } @@ -324,12 +350,23 @@ static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_regis return reg; } -static void emit_tex(struct r500_pfs_compile_state *cs, - struct prog_instruction *fpi, int dest, int counter) +static int emit_slot(struct r500_pfs_compile_state *cs) +{ + if (cs->nrslots >= 512) { + ERROR("Too many instructions"); + cs->nrslots = 1; + return 0; + } + return cs->nrslots++; +} + +static int emit_tex(struct r500_pfs_compile_state *cs, + struct prog_instruction *fpi, int dest) { PROG_CODE; int hwsrc, hwdest; GLuint mask; + int counter = emit_slot(cs); mask = fpi->DstReg.WriteMask << 11; hwsrc = make_src(cs, fpi->SrcReg[0]); @@ -399,844 +436,490 @@ static void emit_tex(struct r500_pfs_compile_state *cs, | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } + + return counter; } -static void emit_alu(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi) { +/* Do not call directly */ +static int _helper_emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, + int File, int Index, int WriteMask) +{ PROG_CODE; - /* Ideally, we shouldn't have to explicitly clear memory here! */ - code->inst[counter].inst0 = 0x0; - code->inst[counter].inst1 = 0x0; - code->inst[counter].inst2 = 0x0; - code->inst[counter].inst3 = 0x0; - code->inst[counter].inst4 = 0x0; - code->inst[counter].inst5 = 0x0; + int counter = emit_slot(cs); - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - code->inst[counter].inst0 = R500_INST_TYPE_OUT; + code->inst[counter].inst4 = alphaop; + code->inst[counter].inst5 = rgbop; - if (fpi->DstReg.Index == FRAG_RESULT_COLR) - code->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); + if (File == PROGRAM_OUTPUT) { + code->inst[counter].inst0 = R500_INST_TYPE_OUT; - if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { + if (Index == FRAG_RESULT_COLR) { + code->inst[counter].inst0 |= WriteMask << 15; + } else if (Index == FRAG_RESULT_DEPR) { code->inst[counter].inst4 |= R500_ALPHA_W_OMASK; - /* Notify the state emission! */ cs->compiler->fp->writes_depth = GL_TRUE; } } else { + int dest = Index + code->temp_reg_offset; + code->inst[counter].inst0 = R500_INST_TYPE_ALU - /* pixel_mask */ - | (fpi->DstReg.WriteMask << 11); + | (WriteMask << 11); + code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest); + code->inst[counter].inst5 |= R500_ALU_RGBA_ADDRD(dest); } code->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; + + return counter; } -static void emit_mov(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { +/** + * Prepare an ALU slot with the given RGB operation, ALPHA operation, and + * destination register. + */ +static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, struct prog_dst_register dst) +{ + return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask); +} + +static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask) +{ + return _helper_emit_alu(cs, rgbop, alphaop, + PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask); +} + +/** + * Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src0_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{ PROG_CODE; - /* The r3xx shader uses MAD to implement MOV. We are using CMP, since - * it is technically more accurate and recommended by ATI/AMD. */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); - /* (De)mangle the swizzle from Mesa to R500. */ - swizzle = make_rgba_swizzle(swizzle); - /* 0x1FF is 9 bits, size of an RGB swizzle. */ - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) - | R500_ALU_RGB_OMOD_DISABLE; - code->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) - | R500_ALPHA_OMOD_DISABLE; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + code->inst[ip].inst1 |= R500_RGB_ADDR0(src); + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(src); } -static void emit_mad(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int one, int two, int three) { +/** + * Set an instruction's source 1 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src1_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{ PROG_CODE; - /* Note: This code was all Corbin's. Corbin is a rather hackish coder. - * If you can make it pretty or fast, please do so! */ - emit_alu(cs, counter, fpi); - /* Common MAD stuff */ - code->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(make_dest(cs, fpi->DstReg)); - code->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(make_dest(cs, fpi->DstReg)); - switch (one) { - case 0: - case 1: - case 2: - code->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(cs, fpi->SrcReg[one])); - code->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(cs, fpi->SrcReg[one])); - code->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); - code->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); - break; - case R500_SWIZZLE_ZERO: - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); - code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); - code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", one); - break; - } - switch (two) { - case 0: - case 1: - case 2: - code->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(cs, fpi->SrcReg[two])); - code->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(cs, fpi->SrcReg[two])); - code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); - code->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 - | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); - break; - case R500_SWIZZLE_ZERO: - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", two); - break; - } - switch (three) { - case 0: - case 1: - case 2: - code->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(cs, fpi->SrcReg[three])); - code->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(cs, fpi->SrcReg[three])); - code->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); - break; - case R500_SWIZZLE_ZERO: - code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", three); - break; - } + code->inst[ip].inst1 |= R500_RGB_ADDR1(src); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(src); } -static void emit_sop(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { +/** + * Set an instruction's source 2 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src2_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{ PROG_CODE; - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src); - code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP - | R500_ALU_RGBA_ADDRD(dest); - switch (opcode) { - case OPCODE_COS: - code->inst[counter].inst4 |= R500_ALPHA_OP_COS; - break; - case OPCODE_EX2: - code->inst[counter].inst4 |= R500_ALPHA_OP_EX2; - break; - case OPCODE_LG2: - code->inst[counter].inst4 |= R500_ALPHA_OP_LN2; - break; - case OPCODE_RCP: - code->inst[counter].inst4 |= R500_ALPHA_OP_RCP; - break; - case OPCODE_RSQ: - code->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; - break; - case OPCODE_SIN: - code->inst[counter].inst4 |= R500_ALPHA_OP_SIN; - break; - default: - ERROR("Bad opcode in emit_sop: %d\n", opcode); - break; + code->inst[ip].inst1 |= R500_RGB_ADDR2(src); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(src); +} + +/** + * Set an instruction's source 0 (both RGB and ALPHA) according to the given source register. + */ +static void set_src0(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ + set_src0_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's source 1 (both RGB and ALPHA) according to the given source register. + */ +static void set_src1(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ + set_src1_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's source 2 (both RGB and ALPHA) according to the given source register. + */ +static void set_src2(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ + set_src2_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's argument A (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argA(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ + PROG_CODE; + code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_A_SHIFT) | MAKE_SWIZ_RGB_A(swizRGB); + code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_A_SHIFT) | MAKE_SWIZ_ALPHA_A(swizA); +} + +/** + * Set an instruction's argument B (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argB(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ + PROG_CODE; + code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_B_SHIFT) | MAKE_SWIZ_RGB_B(swizRGB); + code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_B_SHIFT) | MAKE_SWIZ_ALPHA_B(swizA); +} + +/** + * Set an instruction's argument C (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argC(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ + PROG_CODE; + code->inst[ip].inst5 |= + (source << R500_ALU_RGBA_SEL_C_SHIFT) | + MAKE_SWIZ_RGBA_C(swizRGB) | + (source << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT) | + MAKE_SWIZ_ALPHA_C(swizA); +} + +/** + * Set an instruction's argument A (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argA_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ + set_argA(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Set an instruction's argument B (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argB_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ + set_argB(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Set an instruction's argument C (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argC_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ + set_argC(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Emit a special scalar operation. + */ +static int emit_sop(struct r500_pfs_compile_state *cs, + int opcode, struct prog_dst_register dstreg, GLuint src, GLuint swiz) +{ + int ip = emit_alu(cs, R500_ALU_RGBA_OP_SOP, opcode, dstreg); + set_src0_direct(cs, ip, src); + set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, swiz); + return ip; +} + + +/** + * Emit trigonometric function COS, SIN, SCS + */ +static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) +{ + int ip; + struct prog_dst_register temp = dstreg_template; + temp.Index = get_temp(cs, 0); + temp.WriteMask = WRITEMASK_W; + + /* temp = Input*(1/2pi) */ + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI)); + set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0])); + set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + + /* temp = frac(dst) */ + ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp); + set_src0_direct(cs, ip, temp.Index); + set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); + + /* Dest = trig(temp) */ + if (fpi->Opcode == OPCODE_COS) { + emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); + } else if (fpi->Opcode == OPCODE_SIN) { + emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); + } else if (fpi->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = fpi->DstReg; + + if (fpi->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); + } + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); + } } } -static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi, int counter) { +/** + * Emit a LIT instruction. + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + */ +static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) +{ + GLuint cnst; + int needTemporary; + GLuint temp; + int ip; + + cnst = emit_const4fv(cs, LIT); + + needTemporary = 0; + if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT) + needTemporary = 1; + + if (needTemporary) { + temp = get_temp(cs, 0); + } else { + temp = fpi->DstReg.Index; + } + + // MAX tmp.xyw, op0, { 0, 0, 0, -128+eps } + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1_direct(cs, ip, cnst); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); + + // MIN tmp.z, tmp.w, { 128-eps } + // LG2 tmp.w, tmp.y + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW); + set_src0_direct(cs, ip, temp); + set_src1_direct(cs, ip, cnst); + set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y); + set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X); + + // MOV tmp.y, tmp.x + // MUL tmp.w, tmp.z, tmp.w + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW); + set_src0_direct(cs, ip, temp); + set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z); + set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + + // MOV tmp.x, 1.0 + // EX2 tmp.w, tmp.w + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW); + set_src0_direct(cs, ip, temp); + set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); + set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + + // tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0 + // MOV tmp.w, 1.0 + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW); + set_src0_direct(cs, ip, temp); + set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE); + set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE); + set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO); + + if (needTemporary) { + ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); + set_src0_direct(cs, ip, temp); + set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); + set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + } +} + +static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) { PROG_CODE; GLuint src[3], dest = 0; - int temp_swiz = 0; + int ip; if (fpi->Opcode != OPCODE_KIL) { dest = make_dest(cs, fpi->DstReg); } switch (fpi->Opcode) { - case OPCODE_ABS: - emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - code->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS - | R500_ALU_RGB_MOD_B_ABS; - code->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS - | R500_ALPHA_MOD_B_ABS; - break; case OPCODE_ADD: /* Variation on MAD: 1*src0+src1 */ - emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE); + set_argB_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argC_reg(cs, ip, 1, fpi->SrcReg[1]); break; case OPCODE_CMP: /* This inst's selects need to be swapped as follows: * 0 -> C ; 1 -> B ; 2 -> A */ - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - src[2] = make_src(cs, fpi->SrcReg[2]); - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_src2(cs, ip, fpi->SrcReg[2]); + set_argA_reg(cs, ip, 2, fpi->SrcReg[2]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); + set_argC_reg(cs, ip, 0, fpi->SrcReg[0]); break; case OPCODE_COS: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = emit_const4fv(cs, RCP_2PI); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(cs, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); - counter++; - emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + emit_trig(cs, fpi); break; case OPCODE_DP3: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); break; case OPCODE_DP4: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DPH: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_DP4, R500_ALPHA_OP_DP, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); break; case OPCODE_DST: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); /* [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_SEL_B_SRC1; - /* Select [1, y, z, 1] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [1, y, 1, w] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); - code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); - code->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + * So basically MUL with lotsa swizzling. */ + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA(cs, ip, 0, + (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE, + R500_SWIZZLE_ONE); + set_argB(cs, ip, 1, + (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6), + make_alpha_swizzle(fpi->SrcReg[1])); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); break; case OPCODE_EX2: src[0] = make_src(cs, fpi->SrcReg[0]); - emit_sop(cs, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_FLR: - src[0] = make_src(cs, fpi->SrcReg[0]); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); - counter++; - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(get_temp(cs, 0)); - code->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC1 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_MOD_C_NEG; + dest = get_temp(cs, 0); + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW); + set_src0(cs, ip, fpi->SrcReg[0]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1_direct(cs, ip, dest); + set_argA_reg(cs, ip, 0, fpi->SrcReg[1]); + set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE); + set_argC(cs, ip, 1, + R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9), + SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3)); break; case OPCODE_FRC: - src[0] = make_src(cs, fpi->SrcReg[0]); - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); break; case OPCODE_LG2: src[0] = make_src(cs, fpi->SrcReg[0]); - emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_LIT: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = emit_const4fv(cs, LIT); - /* First inst: MAX temp, input, [0, 0, 0, -128] - * Write: RG, A */ - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARG << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - code->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); - counter++; - /* Second inst: MIN temp, temp, [x, x, x, 128] - * Write: A */ - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) | R500_ALPHA_ADDR1(src[1]); - /* code->inst[counter].inst3; */ - code->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); - counter++; - /* Third-fifth insts: POW temp, temp.y, temp.w - * Write: B */ - emit_sop(cs, counter, fpi, OPCODE_LG2, get_temp(cs, 0), SWIZZLE_Y, get_temp(cs, 1)); - code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 1)) - | R500_RGB_ADDR1(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 1)) - | R500_ALPHA_ADDR1(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), SWIZZLE_W, get_temp(cs, 0)); - code->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); - counter++; - /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; - * Write: ARGB - * This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_1 - | R500_ALU_RGB_G_SWIZ_A_R - | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_1 - | R500_ALU_RGB_G_SWIZ_B_R - | R500_ALU_RGB_B_SWIZ_B_0; - code->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_R_SWIZ_R - | R500_ALU_RGBA_G_SWIZ_R - | R500_ALU_RGBA_B_SWIZ_R - | R500_ALU_RGBA_A_SWIZ_R; + emit_lit(cs, fpi); break; case OPCODE_LRP: - /* src0 * src1 + INV(src0) * src2 - * 1) MUL src0, src1, temp - * 2) PRE 1-src0; MAD srcp, src2, temp */ - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - src[2] = make_src(cs, fpi->SrcReg[2]); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[2]) - | R500_RGB_ADDR2(get_temp(cs, 0)) - | R500_RGB_SRCP_OP_1_MINUS_RGB0; - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[2]) - | R500_ALPHA_ADDR2(get_temp(cs, 0)) - | R500_ALPHA_SRCP_OP_1_MINUS_A0; - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[2])); - code->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[2])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | R500_ALU_RGBA_A_SWIZ_A; + /* result = src0*src1 + (1-src0)*src2 + * = src0*src1 + src2 + (-src0)*src2 + * + * Note: LRP without swizzling (or with only limited + * swizzling) could be done more efficiently using the + * presubtract hardware. + */ + dest = get_temp(cs, 0); + ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, dest, WRITEMASK_XYZW); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_src2(cs, ip, fpi->SrcReg[2]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); + set_argC_reg(cs, ip, 2, fpi->SrcReg[2]); + + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[2]); + set_src2_direct(cs, ip, dest); + set_argA(cs, ip, 0, + make_rgb_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<9), + make_alpha_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<3)); + set_argB_reg(cs, ip, 1, fpi->SrcReg[2]); + set_argC(cs, ip, 2, R500_SWIZ_RGB_RGB, SWIZZLE_W); break; case OPCODE_MAD: - emit_mad(cs, counter, fpi, 0, 1, 2); + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_src2(cs, ip, fpi->SrcReg[2]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); + set_argC_reg(cs, ip, 2, fpi->SrcReg[2]); break; case OPCODE_MAX: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); break; case OPCODE_MIN: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 |= R500_ALPHA_OP_MIN - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN - | R500_ALU_RGBA_ADDRD(dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_MIN, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); break; case OPCODE_MOV: - emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + code->inst[ip].inst3 |= R500_ALU_RGB_OMOD_DISABLE; + code->inst[ip].inst4 |= R500_ALPHA_OMOD_DISABLE; break; case OPCODE_MUL: /* Variation on MAD: src0*src1+0 */ - emit_mad(cs, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); - break; - case OPCODE_POW: - /* POW(a,b) = EX2(LN2(a)*b) */ - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(cs, 0)); - code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 1)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); + set_src0(cs, ip, fpi->SrcReg[0]); + set_src1(cs, ip, fpi->SrcReg[1]); + set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); + set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); break; case OPCODE_RCP: src[0] = make_src(cs, fpi->SrcReg[0]); - emit_sop(cs, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + emit_sop(cs, R500_ALPHA_OP_RCP, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_RSQ: src[0] = make_src(cs, fpi->SrcReg[0]); - emit_sop(cs, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_SCS: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = emit_const4fv(cs, RCP_2PI); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(cs, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); - counter++; - /* Do a cosine, then a sine, masking out the channels we want to protect. */ - /* Cosine only goes in R (x) channel. */ - fpi->DstReg.WriteMask = 0x1; - emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - counter++; - /* Sine only goes in G (y) channel. */ - fpi->DstReg.WriteMask = 0x2; - emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SGE: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - code->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - code->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; + emit_trig(cs, fpi); break; case OPCODE_SIN: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = emit_const4fv(cs, RCP_2PI); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - code->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(cs, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); - counter++; - emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SLT: - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - code->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - code->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; - break; - case OPCODE_SUB: - /* Variation on MAD: 1*src0-src1 */ - fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ - emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); - break; - case OPCODE_SWZ: - /* TODO: The rarer negation masks! */ - emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - break; - case OPCODE_XPD: - /* src0 * src1 - src1 * src0 - * 1) MUL temp.xyz, src0.yzx, src1.zxy - * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ - src[0] = make_src(cs, fpi->SrcReg[0]); - src[1] = make_src(cs, fpi->SrcReg[1]); - code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_RGB << 11); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - code->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(cs, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(cs, counter, fpi); - code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) - | R500_RGB_ADDR2(get_temp(cs, 0)); - code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) - | R500_ALPHA_ADDR2(get_temp(cs, 0)); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - code->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SWIZ_B_1; - code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_A_SWIZ_0; + emit_trig(cs, fpi); break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: - emit_tex(cs, fpi, dest, counter); - if (fpi->DstReg.File == PROGRAM_OUTPUT) - counter++; + emit_tex(cs, fpi, dest); break; default: ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); @@ -1245,37 +928,30 @@ static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *f /* Finishing touches */ if (fpi->SaturateMode == SATURATE_ZERO_ONE) { - code->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + code->inst[cs->nrslots-1].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; } - - counter++; - - return counter; } static GLboolean parse_program(struct r500_pfs_compile_state *cs) { PROG_CODE; - int counter = 0; struct prog_instruction* fpi; for(fpi = cs->compiler->program->Instructions; fpi->Opcode != OPCODE_END; ++fpi) { - counter = do_inst(cs, fpi, counter); + do_inst(cs, fpi); if (cs->compiler->fp->error) return GL_FALSE; } /* Finish him! (If it's an ALU/OUT instruction...) */ - if ((code->inst[counter-1].inst0 & 0x3) == 1) { - code->inst[counter-1].inst0 |= R500_INST_LAST; + if ((code->inst[cs->nrslots-1].inst0 & 0x3) == 1) { + code->inst[cs->nrslots-1].inst0 |= R500_INST_LAST; } else { /* We still need to put an output inst, right? */ WARN_ONCE("Final FP instruction is not an OUT.\n"); } - cs->nrslots = counter; - code->max_temp_idx++; return GL_TRUE; @@ -1295,12 +971,10 @@ static void init_program(struct r500_pfs_compile_state *cs) driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization"); cs->compiler->fp->translated = GL_FALSE; cs->compiler->fp->error = GL_FALSE; - code->const_nr = 0; - /* Size of pixel stack, plus 1. */ - code->max_temp_idx = 1; - /* Temp register offset. */ - code->temp_reg_offset = 0; - /* Whether or not we perform any depth writing. */ + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; /* Size of pixel stack, plus 1. */ + cs->nrslots = 0; cs->compiler->fp->writes_depth = GL_FALSE; /* Work out what temps the Mesa inputs correspond to, this must match -- 2.30.2