From: Nicolai Haehnle Date: Sat, 14 Jun 2008 21:09:15 +0000 (+0200) Subject: r300: Add radeonTransformALU and fix a bug in r300_fragprog DPH X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=43da1189610fcaa5ade69620734a7b1e5caf84c4;p=mesa.git r300: Add radeonTransformALU and fix a bug in r300_fragprog DPH This new generic transform replaces "special" instructions by more generic variants. Hopefully, we will be able to share this code between r300 and r500. --- diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7b8f5f1384d..d52b2b4c36d 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,6 +37,7 @@ DRIVER_SOURCES = \ r300_tex.c \ r300_texstate.c \ radeon_program.c \ + radeon_program_alu.c \ r300_vertprog.c \ r300_fragprog.c \ r300_fragprog_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index f8cc3ae7728..6d24d266fec 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -49,6 +49,8 @@ #include "r300_fragprog.h" #include "r300_state.h" +#include "radeon_program_alu.h" + static void reset_srcreg(struct prog_src_register* reg) { @@ -396,12 +398,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); - struct radeon_program_transformation transformations[1] = { - { &transform_TEX, &compiler } + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 } }; radeonClauseLocalTransform(&compiler.compiler, &compiler.compiler.Clauses[0], - 1, transformations); + 2, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler state after transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index aec202a129b..9ba29feb40b 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -838,6 +838,10 @@ static GLuint t_src(struct r300_pfs_compile_state *cs, /* no point swizzling ONE/ZERO/HALF constants... */ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); + if (fpsrc.Abs) + r = absolute(r); + if (fpsrc.NegateAbs) + r = negate(r); return r; } @@ -1309,7 +1313,7 @@ static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + (srcpos[i] * s_swiz[REG_GET_SSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) + stride)) | ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) | ((src[i] & REG_ABS_MASK) @@ -1562,11 +1566,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst } switch (fpi->Opcode) { - case OPCODE_ABS: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - absolute(src[0]), pfs_one, pfs_zero, flags); - break; case OPCODE_ADD: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1649,16 +1648,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_DP4, dest, mask, src[0], src[1], undef, flags); break; - case OPCODE_DPH: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - /* src0.xyz1 -> temp - * DP4 dest, temp, src1 - */ - emit_arith(cs, PFS_OP_DP4, dest, mask, - swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); - break; case OPCODE_DST: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1685,18 +1674,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_EX2, dest, mask, src[0], undef, undef, flags); break; - case OPCODE_FLR: - src[0] = t_src(cs, fpi->SrcReg[0]); - temp[0] = get_temp_reg(cs); - /* FRC temp, src0 - * MAD dest, src0, 1.0, -temp - */ - emit_arith(cs, PFS_OP_FRC, temp[0], mask, - keep(src[0]), undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(temp[0]), flags); - free_temp(cs, temp[0]); - break; case OPCODE_FRC: src[0] = t_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_FRC, dest, mask, @@ -1751,7 +1728,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst src[0], src[1], undef, flags); break; case OPCODE_MOV: - case OPCODE_SWZ: src[0] = t_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, pfs_zero, flags); @@ -1762,18 +1738,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], pfs_zero, flags); break; - case OPCODE_POW: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - src[1] = t_scalar_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, - src[0], undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], src[1], pfs_zero, 0); - emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, - temp[0], undef, undef, 0); - free_temp(cs, temp[0]); - break; case OPCODE_RCP: src[0] = t_scalar_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_RCP, dest, mask, @@ -1852,19 +1816,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst free_temp(cs, temp[0]); free_temp(cs, temp[1]); break; - case OPCODE_SGE: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 0 : 1 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_one, pfs_zero, temp[0], 0); - free_temp(cs, temp[0]); - break; case OPCODE_SIN: /* * using a parabola: @@ -1918,25 +1869,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst free_temp(cs, temp[0]); break; - case OPCODE_SLT: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 1 : 0 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_zero, pfs_one, temp[0], 0); - free_temp(cs, temp[0]); - break; - case OPCODE_SUB: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(src[1]), flags); - break; case OPCODE_TEX: emit_tex(cs, fpi, R300_TEX_OP_LD); break; @@ -1946,29 +1878,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst case OPCODE_TXP: emit_tex(cs, fpi, R300_TEX_OP_TXP); break; - case OPCODE_XPD:{ - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0.zxy * src1.yzx */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_XYZ, swizzle(keep(src[0]), - Z, X, Y, W), - swizzle(keep(src[1]), Y, Z, X, W), - pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp - * dest.w = undefined - * */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & WRITEMASK_XYZ, swizzle(src[0], - Y, Z, - X, W), - swizzle(src[1], Z, X, Y, W), - negate(temp[0]), flags); - /* cleanup */ - free_temp(cs, temp[0]); - break; - } default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c new file mode 100644 index 00000000000..7fe940a7d7f --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + + +static struct prog_instruction *emit1(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) +{ + SrcReg->Swizzle &= ~(7 << (3*coordinate)); + SrcReg->Swizzle |= swz << (3*coordinate); +} + +static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) +{ + SrcReg->NegateBase &= ~(1 << coordinate); + SrcReg->NegateBase |= (negate << coordinate); +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.NegateAbs = !newreg.NegateAbs; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + GET_SWZ(reg.Swizzle, x), + GET_SWZ(reg.Swizzle, y), + GET_SWZ(reg.Swizzle, z), + GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.NegateBase = 0; + src.NegateAbs = 0; + emit1(ctx, OPCODE_MOV, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + if (src0.NegateAbs) { + if (src0.Abs) { + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + emit1(ctx, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0); + src0 = srcreg(src0.File, src0.Index); + } else { + src0.NegateAbs = 0; + src0.NegateBase ^= NEGATE_XYZW; + } + } + set_swizzle(&src0, 3, SWIZZLE_ONE); + set_negate_base(&src0, 3, 0); + emit2(ctx, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]); +} + +static void transform_FLR(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + emit1(ctx, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(ctx, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +static void transform_POW(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(ctx, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0])); + emit2(ctx, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(ctx, OPCODE_EX2, inst->DstReg, tempsrc); +} + +static void transform_SGE(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(ctx, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(ctx, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + emit2(ctx, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + emit1(ctx, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(ctx, OPCODE_MAD, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * @note should be applicable to R300 and R500 fragment programs. + * + * @todo add LIT here as well? + */ +GLboolean radeonTransformALU( + struct radeon_program_transform_context* ctx, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(ctx, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(ctx, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(ctx, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(ctx, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(ctx, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(ctx, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(ctx, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(ctx, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(ctx, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h new file mode 100644 index 00000000000..940459624fc --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_program_transform_context*, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */