From: Tom Stellard Date: Mon, 5 Sep 2011 13:57:36 +0000 (-0700) Subject: r300/compiler: Implement ROUND X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3d32e589879806297258e36ea80aae5044293ca3;p=mesa.git r300/compiler: Implement ROUND According to the GLSL spec, the implementor can decide which way to round when the fraction is .5. The r300 compiler will round down. --- diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c index afd78ad79dd..527db9a1f69 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -245,6 +245,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1, .IsStandardScalar = 1 }, + { + .Opcode = RC_OPCODE_ROUND, + .Name = "ROUND", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, { .Opcode = RC_OPCODE_RSQ, .Name = "RSQ", diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h index b5868820611..0b881c2bfe2 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -133,6 +133,9 @@ typedef enum { /** scalar instruction: dst = 1 / src0.x */ RC_OPCODE_RCP, + /** vec4 instruction: dst.c = floor(src0.c + 0.5) */ + RC_OPCODE_ROUND, + /** scalar instruction: dst = 1 / sqrt(src0.x) */ RC_OPCODE_RSQ, diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index e273bc40c26..dd1dfb344d4 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = { .Index = 0, .Swizzle = RC_SWIZZLE_1111 }; + +static const struct rc_src_register builtin_half = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_HHHH +}; + static const struct rc_src_register srcreg_undefined = { .File = RC_FILE_NONE, .Index = 0, @@ -416,6 +423,43 @@ static void transform_POW(struct radeon_compiler* c, rc_remove_instruction(inst); } +/* dst = ROUND(src) : + * add = src + .5 + * frac = FRC(add) + * dst = add - frac + * + * According to the GLSL spec, the implementor can decide which way to round + * when the fraction is .5. We round down for .5. + * + */ +static void transform_ROUND(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int mask = inst->U.I.DstReg.WriteMask; + unsigned int frac_index, add_index; + struct rc_dst_register frac_dst, add_dst; + struct rc_src_register frac_src, add_src; + + /* add = src + .5 */ + add_index = rc_find_free_temporary(c); + add_dst = dstregtmpmask(add_index, mask); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0], + builtin_half); + add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index); + + + /* frac = FRC(add) */ + frac_index = rc_find_free_temporary(c); + frac_dst = dstregtmpmask(frac_index, mask); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src); + frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); + + /* dst = add - frac */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, + add_src, negate(frac_src)); + rc_remove_instruction(inst); +} + static void transform_RSQ(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -599,6 +643,7 @@ int radeonTransformALU( case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1; case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 07a3f3caee7..4cb08b5836b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; - /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ + case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; case TGSI_OPCODE_POW: return RC_OPCODE_POW;