From f2616e56de8a48360cae8f269727b58490555f4d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 18 Nov 2010 11:05:32 -0800 Subject: [PATCH] glsl: Add ir_unop_sin_reduced and ir_unop_cos_reduced The operate just like ir_unop_sin and ir_unop_cos except that they expect their inputs to be limited to the range [-pi, pi]. Several GPUs require this limited range for their sine and cosine instructions, so having these as operations (along with a to-be-written lowering pass) helps this architectures. These new operations also matche the semantics of the GL_ARB_fragment_program SCS instruction. Having these as operations helps in generating GLSL IR directly from assembly fragment programs. --- src/glsl/ir.cpp | 2 + src/glsl/ir.h | 2 + src/glsl/ir_constant_expression.cpp | 2 + src/glsl/ir_validate.cpp | 2 + src/mesa/drivers/dri/i965/brw_fs.cpp | 2 + src/mesa/program/ir_to_mesa.cpp | 110 +++++++++++++++++++++++++++ 6 files changed, 120 insertions(+) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 574ef3e1832..714826343c7 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -239,6 +239,8 @@ static const char *const operator_strs[] = { "round_even", "sin", "cos", + "sin_reduced", + "cos_reduced", "dFdx", "dFdy", "noise", diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 3ea7301f472..2b94e63cc2c 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -729,6 +729,8 @@ enum ir_expression_operation { /*@{*/ ir_unop_sin, ir_unop_cos, + ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */ + ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */ /*@}*/ /** diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 8a54fc78cca..45860b279fb 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -216,6 +216,7 @@ ir_expression::constant_expression_value() break; case ir_unop_sin: + case ir_unop_sin_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = sinf(op[0]->value.f[c]); @@ -223,6 +224,7 @@ ir_expression::constant_expression_value() break; case ir_unop_cos: + case ir_unop_cos_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = cosf(op[0]->value.f[c]); diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 77f48968b81..2a066c1a277 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -273,6 +273,8 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_fract: case ir_unop_sin: case ir_unop_cos: + case ir_unop_sin_reduced: + case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ac795e0bda1..164f89eace4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -778,9 +778,11 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by ir_explog_to_explog2"); break; case ir_unop_sin: + case ir_unop_sin_reduced: emit_math(FS_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: + case ir_unop_cos_reduced: emit_math(FS_OPCODE_COS, this->result, op[0]); break; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 870fd6f25e1..ef9f692f946 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -282,6 +282,10 @@ public: ir_to_mesa_src_reg src0, ir_to_mesa_src_reg src1); + void emit_scs(ir_instruction *ir, enum prog_opcode op, + ir_to_mesa_dst_reg dst, + const ir_to_mesa_src_reg &src); + GLboolean try_emit_mad(ir_expression *ir, int mul_operand); @@ -475,6 +479,10 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir, GLuint src0_swiz = GET_SWZ(src0.swizzle, i); GLuint src1_swiz = GET_SWZ(src1.swizzle, i); for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ if (!(done_mask & (1 << j)) && GET_SWZ(src0.swizzle, j) == src0_swiz && GET_SWZ(src1.swizzle, j) == src1_swiz) { @@ -508,6 +516,102 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir, ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef); } +/** + * Emit an OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other Mesa (or + * ARB_fragment_program) opcodes. Instead of splatting its result across all + * four components of the destination, it writes one value to the \c x + * component and another value to the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which + * value is desired. + * \param dst Destination register + * \param src Source register + */ +void +ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, + ir_to_mesa_dst_reg dst, + const ir_to_mesa_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + ir_to_mesa_emit_scalar_op1(ir, op, dst, src); + return; + } + + const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + ir_to_mesa_src_reg tmp; + + assert(op == OPCODE_SIN || op == OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + ir_to_mesa_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + ir_to_mesa_instruction *inst; + ir_to_mesa_dst_reg tmp_dst = ir_to_mesa_dst_reg_from_src(tmp); + + /* Emit the SCS instruction. + */ + inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, tmp_dst, src0); + inst->dst_reg.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, tmp); + inst->dst_reg.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + ir_to_mesa_instruction *inst = + ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, src0); + inst->dst_reg.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + struct ir_to_mesa_src_reg ir_to_mesa_visitor::src_reg_for_float(float val) { @@ -942,6 +1046,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_cos: ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]); break; + case ir_unop_sin_reduced: + emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, OPCODE_COS, result_dst, op[0]); + break; case ir_unop_dFdx: ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]); -- 2.30.2