mesa: fix emit_clamp() so that we don't use an output register as temporary
authorBrian Paul <brian.paul@tungstengraphics.com>
Fri, 14 Mar 2008 19:50:01 +0000 (13:50 -0600)
committerBrian Paul <brian.paul@tungstengraphics.com>
Fri, 14 Mar 2008 19:50:01 +0000 (13:50 -0600)
IR_CLAMP is decomposed into OPCODE_MIN+OPCODE_MAX.  Allocate a temporary
register for the intermediate value so we don't inadvertantly use an output
register (which are write-only on some GPUs).

src/mesa/shader/slang/slang_emit.c

index 2b08e7020f049b3d16826d785765d46f492dac15..3763b567055c854c8e61a71d3dd979252da8cabb 100644 (file)
@@ -677,6 +677,7 @@ static struct prog_instruction *
 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
 {
    struct prog_instruction *inst;
+   slang_ir_node tmpNode;
 
    assert(n->Opcode == IR_CLAMP);
    /* ch[0] = value
@@ -722,18 +723,26 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
    emit(emitInfo, n->Children[1]);
    emit(emitInfo, n->Children[2]);
 
+   /* Some GPUs don't allow reading from output registers.  So if the
+    * dest for this clamp() is an output reg, we can't use that reg for
+    * the intermediate result.  Use a temp register instead.
+    */
+   alloc_temp_storage(emitInfo, &tmpNode, n->Store->Size);
+
    /* tmp = max(ch[0], ch[1]) */
    inst = new_instruction(emitInfo, OPCODE_MAX);
-   storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
+   storage_to_dst_reg(&inst->DstReg, tmpNode.Store, n->Writemask);
    storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
    storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store);
 
-   /* tmp = min(tmp, ch[2]) */
+   /* n->dest = min(tmp, ch[2]) */
    inst = new_instruction(emitInfo, OPCODE_MIN);
    storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
-   storage_to_src_reg(&inst->SrcReg[0], n->Store);
+   storage_to_src_reg(&inst->SrcReg[0], tmpNode.Store);
    storage_to_src_reg(&inst->SrcReg[1], n->Children[2]->Store);
 
+   free_temp_storage(emitInfo->vt, &tmpNode);
+
    return inst;
 }