From: Eric Anholt <eric@anholt.net>
Date: Thu, 6 May 2010 00:21:18 +0000 (-0700)
Subject: ir_to_mesa: Produce multiple scalar ops when required to produce vec4s.
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=12f654c63bc42d353e258cde989d9114cdde26c6;p=mesa.git

ir_to_mesa: Produce multiple scalar ops when required to produce vec4s.

Fixes the code emitted in a test shader for vec2 texcoord / vec2 tex_size.
---

diff --git a/ir_to_mesa.cpp b/ir_to_mesa.cpp
index eb55f82e27f..77ca6df73c7 100644
--- a/ir_to_mesa.cpp
+++ b/ir_to_mesa.cpp
@@ -94,6 +94,51 @@ ir_to_mesa_emit_op1(struct mbtree *tree, enum prog_opcode op,
 			      dst, src0, ir_to_mesa_undef, ir_to_mesa_undef);
 }
 
+/**
+ * Emits Mesa scalar opcodes to produce unique answers across channels.
+ *
+ * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * channel determines the result across all channels.  So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+ir_to_mesa_emit_scalar_op1(struct mbtree *tree, enum prog_opcode op,
+			   ir_to_mesa_dst_reg dst,
+			   ir_to_mesa_src_reg src0)
+{
+   int i, j;
+   int done_mask = 0;
+
+   /* Mesa RCP is a scalar operation splatting results to all channels,
+    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
+    * dst channels.
+    */
+   for (i = 0; i < 4; i++) {
+      int this_mask = (1 << i);
+      ir_to_mesa_instruction *inst;
+      ir_to_mesa_src_reg src = src0;
+
+      if (done_mask & this_mask)
+	 continue;
+
+      int src_swiz = GET_SWZ(src.swizzle, i);
+      for (j = i + 1; j < 4; j++) {
+	 if (GET_SWZ(src.swizzle, j) == src_swiz) {
+	    this_mask |= (1 << j);
+	 }
+      }
+      src.swizzle = MAKE_SWIZZLE4(src_swiz, src_swiz,
+				  src_swiz, src_swiz);
+
+      inst = ir_to_mesa_emit_op1(tree, op,
+				 dst,
+				 src);
+      inst->dst_reg.writemask = this_mask;
+      done_mask |= this_mask;
+   }
+}
+
 struct mbtree *
 ir_to_mesa_visitor::create_tree(int op,
 				ir_instruction *ir,
@@ -553,7 +598,7 @@ do_ir_to_mesa(exec_list *instructions)
       mesa_inst->DstReg.File = inst->dst_reg.file;
       mesa_inst->DstReg.Index = inst->dst_reg.index;
       mesa_inst->DstReg.CondMask = COND_TR;
-      mesa_inst->DstReg.WriteMask = WRITEMASK_XYZW;
+      mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
diff --git a/ir_to_mesa.h b/ir_to_mesa.h
index cef27f8b79c..c8ceb4c1715 100644
--- a/ir_to_mesa.h
+++ b/ir_to_mesa.h
@@ -45,6 +45,7 @@ typedef struct ir_to_mesa_src_reg {
 typedef struct ir_to_mesa_dst_reg {
    int file; /**< PROGRAM_* from Mesa */
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 } ir_to_mesa_dst_reg;
 
 extern ir_to_mesa_src_reg ir_to_mesa_undef;
@@ -159,6 +160,11 @@ ir_to_mesa_emit_op3(struct mbtree *tree, enum prog_opcode op,
 		    ir_to_mesa_src_reg src1,
 		    ir_to_mesa_src_reg src2);
 
+void
+ir_to_mesa_emit_scalar_op1(struct mbtree *tree, enum prog_opcode op,
+			   ir_to_mesa_dst_reg dst,
+			   ir_to_mesa_src_reg src0);
+
 inline ir_to_mesa_dst_reg
 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 {
@@ -166,6 +172,7 @@ ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 
    dst_reg.file = reg.file;
    dst_reg.index = reg.index;
+   dst_reg.writemask = WRITEMASK_XYZW;
 
    return dst_reg;
 }
diff --git a/mesa_codegen.brg b/mesa_codegen.brg
index f1f24dab84f..3191a44c210 100644
--- a/mesa_codegen.brg
+++ b/mesa_codegen.brg
@@ -184,10 +184,9 @@ vec4: dp2_vec4_vec4(vec4, vec4) 1
 
 vec4: div_vec4_vec4(vec4, vec4) 1
 {
-	/* FINISHME: Mesa RCP only uses the X channel, this node is for vec4. */
-	ir_to_mesa_emit_op1(tree, OPCODE_RCP,
-			    ir_to_mesa_dst_reg_from_src(tree->src_reg),
-			    tree->right->src_reg);
+	ir_to_mesa_emit_scalar_op1(tree, OPCODE_RCP,
+				   ir_to_mesa_dst_reg_from_src(tree->src_reg),
+				   tree->left->src_reg);
 
 	ir_to_mesa_emit_op2(tree, OPCODE_MUL,
 			    ir_to_mesa_dst_reg_from_src(tree->src_reg),
@@ -197,10 +196,9 @@ vec4: div_vec4_vec4(vec4, vec4) 1
 
 vec4: sqrt_vec4(vec4) 1
 {
-	/* FINISHME: Mesa RSQ only uses the X channel, this node is for vec4. */
-	ir_to_mesa_emit_op1(tree, OPCODE_RSQ,
-			    ir_to_mesa_dst_reg_from_src(tree->src_reg),
-			    tree->left->src_reg);
+	ir_to_mesa_emit_scalar_op1(tree, OPCODE_RSQ,
+				   ir_to_mesa_dst_reg_from_src(tree->src_reg),
+				   tree->left->src_reg);
 
 	ir_to_mesa_emit_op1(tree, OPCODE_RCP,
 			    ir_to_mesa_dst_reg_from_src(tree->src_reg),