From: Eric Anholt Date: Thu, 6 May 2010 00:21:18 +0000 (-0700) Subject: ir_to_mesa: Produce multiple scalar ops when required to produce vec4s. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=12f654c63bc42d353e258cde989d9114cdde26c6;p=mesa.git ir_to_mesa: Produce multiple scalar ops when required to produce vec4s. Fixes the code emitted in a test shader for vec2 texcoord / vec2 tex_size. --- diff --git a/ir_to_mesa.cpp b/ir_to_mesa.cpp index eb55f82e27f..77ca6df73c7 100644 --- a/ir_to_mesa.cpp +++ b/ir_to_mesa.cpp @@ -94,6 +94,51 @@ ir_to_mesa_emit_op1(struct mbtree *tree, enum prog_opcode op, dst, src0, ir_to_mesa_undef, ir_to_mesa_undef); } +/** + * Emits Mesa scalar opcodes to produce unique answers across channels. + * + * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +ir_to_mesa_emit_scalar_op1(struct mbtree *tree, enum prog_opcode op, + ir_to_mesa_dst_reg dst, + ir_to_mesa_src_reg src0) +{ + int i, j; + int done_mask = 0; + + /* Mesa RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + int this_mask = (1 << i); + ir_to_mesa_instruction *inst; + ir_to_mesa_src_reg src = src0; + + if (done_mask & this_mask) + continue; + + int src_swiz = GET_SWZ(src.swizzle, i); + for (j = i + 1; j < 4; j++) { + if (GET_SWZ(src.swizzle, j) == src_swiz) { + this_mask |= (1 << j); + } + } + src.swizzle = MAKE_SWIZZLE4(src_swiz, src_swiz, + src_swiz, src_swiz); + + inst = ir_to_mesa_emit_op1(tree, op, + dst, + src); + inst->dst_reg.writemask = this_mask; + done_mask |= this_mask; + } +} + struct mbtree * ir_to_mesa_visitor::create_tree(int op, ir_instruction *ir, @@ -553,7 +598,7 @@ do_ir_to_mesa(exec_list *instructions) mesa_inst->DstReg.File = inst->dst_reg.file; mesa_inst->DstReg.Index = inst->dst_reg.index; mesa_inst->DstReg.CondMask = COND_TR; - mesa_inst->DstReg.WriteMask = WRITEMASK_XYZW; + mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask; mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]); mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]); mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]); diff --git a/ir_to_mesa.h b/ir_to_mesa.h index cef27f8b79c..c8ceb4c1715 100644 --- a/ir_to_mesa.h +++ b/ir_to_mesa.h @@ -45,6 +45,7 @@ typedef struct ir_to_mesa_src_reg { typedef struct ir_to_mesa_dst_reg { int file; /**< PROGRAM_* from Mesa */ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ } ir_to_mesa_dst_reg; extern ir_to_mesa_src_reg ir_to_mesa_undef; @@ -159,6 +160,11 @@ ir_to_mesa_emit_op3(struct mbtree *tree, enum prog_opcode op, ir_to_mesa_src_reg src1, ir_to_mesa_src_reg src2); +void +ir_to_mesa_emit_scalar_op1(struct mbtree *tree, enum prog_opcode op, + ir_to_mesa_dst_reg dst, + ir_to_mesa_src_reg src0); + inline ir_to_mesa_dst_reg ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg) { @@ -166,6 +172,7 @@ ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg) dst_reg.file = reg.file; dst_reg.index = reg.index; + dst_reg.writemask = WRITEMASK_XYZW; return dst_reg; } diff --git a/mesa_codegen.brg b/mesa_codegen.brg index f1f24dab84f..3191a44c210 100644 --- a/mesa_codegen.brg +++ b/mesa_codegen.brg @@ -184,10 +184,9 @@ vec4: dp2_vec4_vec4(vec4, vec4) 1 vec4: div_vec4_vec4(vec4, vec4) 1 { - /* FINISHME: Mesa RCP only uses the X channel, this node is for vec4. */ - ir_to_mesa_emit_op1(tree, OPCODE_RCP, - ir_to_mesa_dst_reg_from_src(tree->src_reg), - tree->right->src_reg); + ir_to_mesa_emit_scalar_op1(tree, OPCODE_RCP, + ir_to_mesa_dst_reg_from_src(tree->src_reg), + tree->left->src_reg); ir_to_mesa_emit_op2(tree, OPCODE_MUL, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -197,10 +196,9 @@ vec4: div_vec4_vec4(vec4, vec4) 1 vec4: sqrt_vec4(vec4) 1 { - /* FINISHME: Mesa RSQ only uses the X channel, this node is for vec4. */ - ir_to_mesa_emit_op1(tree, OPCODE_RSQ, - ir_to_mesa_dst_reg_from_src(tree->src_reg), - tree->left->src_reg); + ir_to_mesa_emit_scalar_op1(tree, OPCODE_RSQ, + ir_to_mesa_dst_reg_from_src(tree->src_reg), + tree->left->src_reg); ir_to_mesa_emit_op1(tree, OPCODE_RCP, ir_to_mesa_dst_reg_from_src(tree->src_reg),