r300g: copy the compiler from r300c
authorMarek Olšák <maraeo@gmail.com>
Tue, 26 Jul 2011 19:15:05 +0000 (21:15 +0200)
committerMarek Olšák <maraeo@gmail.com>
Tue, 26 Jul 2011 20:35:49 +0000 (22:35 +0200)
What a beast.

r300g doesn't depend on files from r300c anymore, so r300c is now left
to its own fate. BTW 'make test' can be invoked from the gallium/r300
directory to run some compiler unit tests.

69 files changed:
src/gallium/drivers/r300/Makefile
src/gallium/drivers/r300/SConscript
src/gallium/drivers/r300/compiler/memory_pool.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/memory_pool.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r300_fragprog.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r300_fragprog.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r300_fragprog_emit.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r3xx_fragprog.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r3xx_vertprog.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r500_fragprog.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r500_fragprog.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/r500_fragprog_emit.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_code.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_code.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_compiler.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_compiler.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_compiler_util.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_compiler_util.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_dataflow.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_dataflow.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_emulate_branches.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_emulate_branches.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_emulate_loops.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_emulate_loops.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_list.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_list.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_opcodes.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_opcodes.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_optimize.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_pair_schedule.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_pair_translate.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_alu.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_alu.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_constants.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_pair.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_pair.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_print.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_tex.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_program_tex.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_remove_constants.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_remove_constants.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_rename_regs.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_rename_regs.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_swizzle.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_variable.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_variable.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/.gitignore [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/Makefile [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/unit_test.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/tests/unit_test.h [new file with mode: 0644]
src/gallium/drivers/r300/r300_emit.h
src/gallium/drivers/r300/r300_fs.c
src/gallium/drivers/r300/r300_fs.h
src/gallium/drivers/r300/r300_reg.h
src/gallium/drivers/r300/r300_tgsi_to_rc.c
src/gallium/drivers/r300/r300_vs.c
src/gallium/drivers/r300/r300_vs.h

index 4088216adcbf10ef8ba074823e73e2be6e6bf50c..4f021276a8f6d1575e3549e328406a37c07305a2 100644 (file)
@@ -26,19 +26,51 @@ C_SOURCES = \
        r300_texture.c \
        r300_texture_desc.c \
        r300_tgsi_to_rc.c \
-       r300_transfer.c
+       r300_transfer.c \
+       \
+       compiler/radeon_code.c \
+       compiler/radeon_compiler.c \
+       compiler/radeon_compiler_util.c \
+       compiler/radeon_emulate_branches.c \
+       compiler/radeon_emulate_loops.c \
+       compiler/radeon_program.c \
+       compiler/radeon_program_print.c \
+       compiler/radeon_opcodes.c \
+       compiler/radeon_program_alu.c \
+       compiler/radeon_program_pair.c \
+       compiler/radeon_program_tex.c \
+       compiler/radeon_pair_translate.c \
+       compiler/radeon_pair_schedule.c \
+       compiler/radeon_pair_regalloc.c \
+       compiler/radeon_pair_dead_sources.c \
+       compiler/radeon_dataflow.c \
+       compiler/radeon_dataflow_deadcode.c \
+       compiler/radeon_dataflow_swizzles.c \
+       compiler/radeon_list.c \
+       compiler/radeon_optimize.c \
+       compiler/radeon_remove_constants.c \
+       compiler/radeon_rename_regs.c \
+       compiler/radeon_variable.c \
+       compiler/r3xx_fragprog.c \
+       compiler/r300_fragprog.c \
+       compiler/r300_fragprog_swizzle.c \
+       compiler/r300_fragprog_emit.c \
+       compiler/r500_fragprog.c \
+       compiler/r500_fragprog_emit.c \
+       compiler/r3xx_vertprog.c \
+       compiler/r3xx_vertprog_dump.c \
+       compiler/memory_pool.c \
+       \
+       $(TOP)/src/glsl/ralloc.c \
+       $(TOP)/src/mesa/program/register_allocate.c
 
-LIBRARY_INCLUDES = \
-       -I$(TOP)/src/mesa/drivers/dri/r300/compiler \
-       -I$(TOP)/include
-
-COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
 
-EXTRA_OBJECTS = \
-       $(COMPILER_ARCHIVE)
+LIBRARY_INCLUDES = \
+       -I$(TOP)/include \
+       -I$(TOP)/src/mesa \
+       -I$(TOP)/src/glsl
 
 include ../../Makefile.template
 
-.PHONY: $(COMPILER_ARCHIVE)
-$(COMPILER_ARCHIVE):
-       $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
+test: default
+       @$(MAKE) -s -C compiler/tests/
index 3af157a7956be92435d6974de7a1a61b7f5287f6..7ffd1c27c96bb8519d3fbf195828528b22a2e3d6 100644 (file)
@@ -1,13 +1,11 @@
 Import('*')
 
-r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
-
 env = env.Clone()
-# add the paths for r300compiler
 env.Append(CPPPATH = [
-    '#/src/mesa/drivers/dri/r300/compiler', 
     '#/include', 
     '#/src/mesa',
+    '#/src/glsl',
+    '#/src/mapi',
 ])
 
 r300 = env.ConvenienceLibrary(
@@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary(
         'r300_texture_desc.c',
         'r300_tgsi_to_rc.c',
         'r300_transfer.c',
-    ] + r300compiler) + r300compiler
+        'compiler/radeon_code.c',
+        'compiler/radeon_compiler.c',
+        'compiler/radeon_compiler_util.c',
+        'compiler/radeon_program.c',
+        'compiler/radeon_program_print.c',
+        'compiler/radeon_opcodes.c',
+        'compiler/radeon_program_alu.c',
+        'compiler/radeon_program_pair.c',
+        'compiler/radeon_program_tex.c',
+        'compiler/radeon_pair_translate.c',
+        'compiler/radeon_pair_schedule.c',
+        'compiler/radeon_pair_regalloc.c',
+        'compiler/radeon_pair_dead_sources.c',
+        'compiler/radeon_optimize.c',
+        'compiler/radeon_remove_constants.c',
+        'compiler/radeon_rename_regs.c',
+        'compiler/radeon_emulate_branches.c',
+        'compiler/radeon_emulate_loops.c',
+        'compiler/radeon_dataflow.c',
+        'compiler/radeon_dataflow_deadcode.c',
+        'compiler/radeon_dataflow_swizzles.c',
+        'compiler/radeon_variable.c',
+        'compiler/radeon_list.c',
+        'compiler/r3xx_fragprog.c',
+        'compiler/r300_fragprog.c',
+        'compiler/r300_fragprog_swizzle.c',
+        'compiler/r300_fragprog_emit.c',
+        'compiler/r500_fragprog.c',
+        'compiler/r500_fragprog_emit.c',
+        'compiler/r3xx_vertprog.c',
+        'compiler/r3xx_vertprog_dump.c',
+        'compiler/memory_pool.c',
+        '#/src/glsl/ralloc.c',
+        '#/src/mesa/program/register_allocate.c'
+    ])
 
 env.Alias('r300', r300)
 
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c
new file mode 100644 (file)
index 0000000..ddcdddf
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 8
+
+
+struct memory_block {
+       struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+       memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+       while(pool->blocks) {
+               struct memory_block * block = pool->blocks;
+               pool->blocks = block->next;
+               free(block);
+       }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+       unsigned int blocksize = pool->total_allocated;
+       struct memory_block * newblock;
+
+       if (!blocksize)
+               blocksize = 2*POOL_LARGE_ALLOC;
+
+       newblock = (struct memory_block*)malloc(blocksize);
+       newblock->next = pool->blocks;
+       pool->blocks = newblock;
+
+       pool->head = (unsigned char*)(newblock + 1);
+       pool->end = ((unsigned char*)newblock) + blocksize;
+       pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+       if (bytes < POOL_LARGE_ALLOC) {
+               void * ptr;
+
+               if (pool->head + bytes > pool->end)
+                       refill_pool(pool);
+
+               assert(pool->head + bytes <= pool->end);
+
+               ptr = pool->head;
+
+               pool->head += bytes;
+               pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+               return ptr;
+       } else {
+               struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+               block->next = pool->blocks;
+               pool->blocks = block;
+
+               return (block + 1);
+       }
+}
+
+
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h
new file mode 100644 (file)
index 0000000..42344d0
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+       unsigned char * head;
+       unsigned char * end;
+       unsigned int total_allocated;
+       struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ *  type * Array;
+ *  unsigned int Size;
+ *  unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+       unsigned int _num = (num); \
+       if ((size) + _num > (reserved)) { \
+               unsigned int newreserve = (reserved) * 2; \
+               type * newarray; \
+               if (newreserve < _num) \
+                       newreserve = 4 * _num; /* arbitrary heuristic */ \
+               newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+               memcpy(newarray, (array), (size) * sizeof(type)); \
+               (array) = newarray; \
+               (reserved) = newreserve; \
+       } \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c
new file mode 100644 (file)
index 0000000..deba9ca
--- /dev/null
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+static void presub_string(char out[10], unsigned int inst)
+{
+       switch(inst & 0x600000){
+       case R300_ALU_SRCP_1_MINUS_2_SRC0:
+               sprintf(out, "bias");
+               break;
+       case R300_ALU_SRCP_SRC1_MINUS_SRC0:
+               sprintf(out, "sub");
+               break;
+       case R300_ALU_SRCP_SRC1_PLUS_SRC0:
+               sprintf(out, "add");
+               break;
+       case R300_ALU_SRCP_1_MINUS_SRC0:
+               sprintf(out, "inv ");
+               break;
+       }
+}
+
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+       return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+       struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+       struct r300_fragment_program_code *code = &compiler->code->code.r300;
+       int n, i, j;
+       static int pc = 0;
+
+       fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+       fprintf(stderr, "Hardware program\n");
+       fprintf(stderr, "----------------\n");
+       if (c->is_r400) {
+               fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+       }
+
+       for (n = 0; n <= (code->config & 3); n++) {
+               uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+               unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+                               (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+               unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+                               (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
+               int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+               int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+               fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+                       "alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,
+                       alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+               if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+                       fprintf(stderr, "  TEX:\n");
+                       for (i = tex_offset;
+                            i <= tex_offset + tex_end;
+                            ++i) {
+                               const char *instr;
+
+                               switch ((code->tex.
+                                        inst[i] >> R300_TEX_INST_SHIFT) &
+                                       15) {
+                               case R300_TEX_OP_LD:
+                                       instr = "TEX";
+                                       break;
+                               case R300_TEX_OP_KIL:
+                                       instr = "KIL";
+                                       break;
+                               case R300_TEX_OP_TXP:
+                                       instr = "TXP";
+                                       break;
+                               case R300_TEX_OP_TXB:
+                                       instr = "TXB";
+                                       break;
+                               default:
+                                       instr = "UNKNOWN";
+                               }
+
+                               fprintf(stderr,
+                                       "    %s t%i, %c%i, texture[%i]   (%08x)\n",
+                                       instr,
+                                       (code->tex.
+                                        inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+                                       't',
+                                       (code->tex.
+                                        inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+                                       (code->tex.
+                                        inst[i] & R300_TEX_ID_MASK) >>
+                                       R300_TEX_ID_SHIFT,
+                                       code->tex.inst[i]);
+                       }
+               }
+
+               for (i = alu_offset;
+                    i <= alu_offset + alu_end; ++i) {
+                       char srcc[4][10], dstc[20];
+                       char srca[4][10], dsta[20];
+                       char argc[3][20];
+                       char arga[3][20];
+                       char flags[5], tmp[10];
+
+                       for (j = 0; j < 3; ++j) {
+                               int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+                               int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+                               int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+                                       code->alu.inst[i].r400_ext_addr);
+                               int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+                                       code->alu.inst[i].r400_ext_addr);
+
+                               sprintf(srcc[j], "%c%i",
+                                       (regc & 32) ? 'c' : 't', (regc & 31) | msbc);
+                               sprintf(srca[j], "%c%i",
+                                       (rega & 32) ? 'c' : 't', (rega & 31) | msba);
+                       }
+
+                       dstc[0] = 0;
+                       sprintf(flags, "%s%s%s",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+                       if (flags[0] != 0) {
+                               unsigned int msb = get_msb(
+                                       R400_ADDRD_EXT_RGB_MSB_BIT,
+                                       code->alu.inst[i].r400_ext_addr);
+
+                               sprintf(dstc, "t%i.%s ",
+                                       ((code->alu.inst[i].
+                                        rgb_addr >> R300_ALU_DSTC_SHIFT)
+                                        & 31) | msb,
+                                       flags);
+                       }
+                       sprintf(flags, "%s%s%s",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+                               (code->alu.inst[i].
+                                rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+                       if (flags[0] != 0) {
+                               sprintf(tmp, "o%i.%s",
+                                       (code->alu.inst[i].
+                                        rgb_addr >> 29) & 3,
+                                       flags);
+                               strcat(dstc, tmp);
+                       }
+                       /* Presub */
+                       presub_string(srcc[3], code->alu.inst[i].rgb_inst);
+                       presub_string(srca[3], code->alu.inst[i].alpha_inst);
+
+                       dsta[0] = 0;
+                       if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+                               unsigned int msb = get_msb(
+                                       R400_ADDRD_EXT_A_MSB_BIT,
+                                       code->alu.inst[i].r400_ext_addr);
+                               sprintf(dsta, "t%i.w ",
+                                       ((code->alu.inst[i].
+                                        alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+                                        | msb);
+                       }
+                       if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+                               sprintf(tmp, "o%i.w ",
+                                       (code->alu.inst[i].
+                                        alpha_addr >> 25) & 3);
+                               strcat(dsta, tmp);
+                       }
+                       if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+                               strcat(dsta, "Z");
+                       }
+
+                       fprintf(stderr,
+                               "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
+                               "       w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
+                               srcc[0], srcc[1], srcc[2], srcc[3], dstc,
+                               code->alu.inst[i].rgb_addr, srca[0], srca[1],
+                               srca[2], srca[3], dsta,
+                               code->alu.inst[i].alpha_addr);
+
+                       for (j = 0; j < 3; ++j) {
+                               int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+                               int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+                               int d;
+                               char buf[20];
+
+                               d = regc & 31;
+                               if (d < 12) {
+                                       switch (d % 4) {
+                                       case R300_ALU_ARGC_SRC0C_XYZ:
+                                               sprintf(buf, "%s.xyz",
+                                                       srcc[d / 4]);
+                                               break;
+                                       case R300_ALU_ARGC_SRC0C_XXX:
+                                               sprintf(buf, "%s.xxx",
+                                                       srcc[d / 4]);
+                                               break;
+                                       case R300_ALU_ARGC_SRC0C_YYY:
+                                               sprintf(buf, "%s.yyy",
+                                                       srcc[d / 4]);
+                                               break;
+                                       case R300_ALU_ARGC_SRC0C_ZZZ:
+                                               sprintf(buf, "%s.zzz",
+                                                       srcc[d / 4]);
+                                               break;
+                                       }
+                               } else if (d < 15) {
+                                       sprintf(buf, "%s.www", srca[d - 12]);
+                               } else if (d < 20 ) {
+                                       switch(d) {
+                                       case R300_ALU_ARGC_SRCP_XYZ:
+                                               sprintf(buf, "srcp.xyz");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_XXX:
+                                               sprintf(buf, "srcp.xxx");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_YYY:
+                                               sprintf(buf, "srcp.yyy");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_ZZZ:
+                                               sprintf(buf, "srcp.zzz");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_WWW:
+                                               sprintf(buf, "srcp.www");
+                                               break;
+                                       }
+                               } else if (d == 20) {
+                                       sprintf(buf, "0.0");
+                               } else if (d == 21) {
+                                       sprintf(buf, "1.0");
+                               } else if (d == 22) {
+                                       sprintf(buf, "0.5");
+                               } else if (d >= 23 && d < 32) {
+                                       d -= 23;
+                                       switch (d / 3) {
+                                       case 0:
+                                               sprintf(buf, "%s.yzx",
+                                                       srcc[d % 3]);
+                                               break;
+                                       case 1:
+                                               sprintf(buf, "%s.zxy",
+                                                       srcc[d % 3]);
+                                               break;
+                                       case 2:
+                                               sprintf(buf, "%s.Wzy",
+                                                       srcc[d % 3]);
+                                               break;
+                                       }
+                               } else {
+                                       sprintf(buf, "%i", d);
+                               }
+
+                               sprintf(argc[j], "%s%s%s%s",
+                                       (regc & 32) ? "-" : "",
+                                       (regc & 64) ? "|" : "",
+                                       buf, (regc & 64) ? "|" : "");
+
+                               d = rega & 31;
+                               if (d < 9) {
+                                       sprintf(buf, "%s.%c", srcc[d / 3],
+                                               'x' + (char)(d % 3));
+                               } else if (d < 12) {
+                                       sprintf(buf, "%s.w", srca[d - 9]);
+                               } else if (d < 16) {
+                                       switch(d) {
+                                       case R300_ALU_ARGA_SRCP_X:
+                                               sprintf(buf, "srcp.x");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_Y:
+                                               sprintf(buf, "srcp.y");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_Z:
+                                               sprintf(buf, "srcp.z");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_W:
+                                               sprintf(buf, "srcp.w");
+                                               break;
+                                       }
+                               } else if (d == 16) {
+                                       sprintf(buf, "0.0");
+                               } else if (d == 17) {
+                                       sprintf(buf, "1.0");
+                               } else if (d == 18) {
+                                       sprintf(buf, "0.5");
+                               } else {
+                                       sprintf(buf, "%i", d);
+                               }
+
+                               sprintf(arga[j], "%s%s%s%s",
+                                       (rega & 32) ? "-" : "",
+                                       (rega & 64) ? "|" : "",
+                                       buf, (rega & 64) ? "|" : "");
+                       }
+
+                       fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x %s\n"
+                               "       w: %8s %8s %8s    op: %08x\n",
+                               argc[0], argc[1], argc[2],
+                               code->alu.inst[i].rgb_inst,
+                               code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
+                               "NOP" : "",
+                               arga[0], arga[1],arga[2],
+                               code->alu.inst[i].alpha_inst);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h
new file mode 100644 (file)
index 0000000..0c88bab
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
new file mode 100644 (file)
index 0000000..e6fd1fd
--- /dev/null
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+       struct r300_fragment_program_compiler * compiler;
+
+       unsigned current_node : 2;
+       unsigned node_first_tex : 8;
+       unsigned node_first_alu : 8;
+       uint32_t node_flags;
+};
+
+#define PROG_CODE \
+       struct r300_fragment_program_compiler *c = emit->compiler; \
+       struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do {                       \
+               rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
+                       __FILE__, __FUNCTION__, ##args);        \
+       } while(0)
+
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+       return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+       return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+       if (index > code->pixsize)
+               code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+       if (!src.Used)
+               return 0;
+
+       if (src.File == RC_FILE_CONSTANT) {
+               return src.Index | (1 << 5);
+       } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+               use_temporary(code, src.Index);
+               return src.Index & 0x1f;
+       }
+
+       return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+       case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
+       case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+       case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+       case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+       default:
+               error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+               /* fall through */
+       case RC_OPCODE_NOP:
+               /* fall through */
+       case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+       case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+       case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+       case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+       }
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+       case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
+       case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+       case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+       case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+       case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+       case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+       default:
+               error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+               /* fall through */
+       case RC_OPCODE_NOP:
+               /* fall through */
+       case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+       case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+       case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+       case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+       case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+       }
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+       int ip;
+       int j;
+       PROG_CODE;
+
+       if (code->alu.length >= c->Base.max_alu_insts) {
+               error("Too many ALU instructions");
+               return 0;
+       }
+
+       ip = code->alu.length++;
+
+       code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+       code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+       for(j = 0; j < 3; ++j) {
+               /* Set the RGB address */
+               unsigned int src = use_source(code, inst->RGB.Src[j]);
+               unsigned int arg;
+               if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
+               code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+               /* Set the Alpha address */
+               src = use_source(code, inst->Alpha.Src[j]);
+               if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
+               code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+               arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+               arg |= inst->RGB.Arg[j].Abs << 6;
+               arg |= inst->RGB.Arg[j].Negate << 5;
+               code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+               arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+               arg |= inst->Alpha.Arg[j].Abs << 6;
+               arg |= inst->Alpha.Arg[j].Negate << 5;
+               code->alu.inst[ip].alpha_inst |= arg << (7*j);
+       }
+
+       /* Presubtract */
+       if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_1_MINUS_2_SRC0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_SRC1_PLUS_SRC0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_SRC1_MINUS_SRC0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_1_MINUS_SRC0;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_1_MINUS_2_SRC0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_SRC1_PLUS_SRC0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_SRC1_MINUS_SRC0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_1_MINUS_SRC0;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (inst->RGB.Saturate)
+               code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+       if (inst->Alpha.Saturate)
+               code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+       if (inst->RGB.WriteMask) {
+               use_temporary(code, inst->RGB.DestIndex);
+               if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
+               code->alu.inst[ip].rgb_addr |=
+                       ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
+                       (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+       }
+       if (inst->RGB.OutputWriteMask) {
+               code->alu.inst[ip].rgb_addr |=
+            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+            R300_RGB_TARGET(inst->RGB.Target);
+               emit->node_flags |= R300_RGBA_OUT;
+       }
+
+       if (inst->Alpha.WriteMask) {
+               use_temporary(code, inst->Alpha.DestIndex);
+               if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
+               code->alu.inst[ip].alpha_addr |=
+                       ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
+                       R300_ALU_DSTA_REG;
+       }
+       if (inst->Alpha.OutputWriteMask) {
+               code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+            R300_ALPHA_TARGET(inst->Alpha.Target);
+               emit->node_flags |= R300_RGBA_OUT;
+       }
+       if (inst->Alpha.DepthWriteMask) {
+               code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+               emit->node_flags |= R300_W_OUT;
+               c->code->writes_depth = 1;
+       }
+       if (inst->Nop)
+               code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
+
+       return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+       struct r300_fragment_program_compiler * c = emit->compiler;
+       struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+       unsigned alu_offset;
+       unsigned alu_end;
+       unsigned tex_offset;
+       unsigned tex_end;
+
+       unsigned int alu_offset_msbs, alu_end_msbs;
+
+       if (code->alu.length == emit->node_first_alu) {
+               /* Generate a single NOP for this node */
+               struct rc_pair_instruction inst;
+               memset(&inst, 0, sizeof(inst));
+               if (!emit_alu(emit, &inst))
+                       return 0;
+       }
+
+       alu_offset = emit->node_first_alu;
+       alu_end = code->alu.length - alu_offset - 1;
+       tex_offset = emit->node_first_tex;
+       tex_end = code->tex.length - tex_offset - 1;
+
+       if (code->tex.length == emit->node_first_tex) {
+               if (emit->current_node > 0) {
+                       error("Node %i has no TEX instructions", emit->current_node);
+                       return 0;
+               }
+
+               tex_end = 0;
+       } else {
+               if (emit->current_node == 0)
+                       code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+       }
+
+       /* Write the config register.
+        * Note: The order in which the words for each node are written
+        * is not correct here and needs to be fixed up once we're entirely
+        * done
+        *
+        * Also note that the register specification from AMD is slightly
+        * incorrect in its description of this register. */
+       code->code_addr[emit->current_node]  =
+                       ((alu_offset << R300_ALU_START_SHIFT)
+                               & R300_ALU_START_MASK)
+                       | ((alu_end << R300_ALU_SIZE_SHIFT)
+                               & R300_ALU_SIZE_MASK)
+                       | ((tex_offset << R300_TEX_START_SHIFT)
+                               & R300_TEX_START_MASK)
+                       | ((tex_end << R300_TEX_SIZE_SHIFT)
+                               & R300_TEX_SIZE_MASK)
+                       | emit->node_flags
+                       | (get_msbs_tex(tex_offset, 5)
+                               << R400_TEX_START_MSB_SHIFT)
+                       | (get_msbs_tex(tex_end, 5)
+                               << R400_TEX_SIZE_MSB_SHIFT)
+                       ;
+
+       /* Write r400 extended instruction fields.  These will be ignored on
+        * r300 cards.  */
+       alu_offset_msbs = get_msbs_alu(alu_offset);
+       alu_end_msbs = get_msbs_alu(alu_end);
+       switch(emit->current_node) {
+       case 0:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+               break;
+       case 1:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+               break;
+       case 2:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+               break;
+       case 3:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+               break;
+       }
+       return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+       PROG_CODE;
+
+       if (code->alu.length == emit->node_first_alu &&
+           code->tex.length == emit->node_first_tex) {
+               return 1;
+       }
+
+       if (emit->current_node == 3) {
+               error("Too many texture indirections");
+               return 0;
+       }
+
+       if (!finish_node(emit))
+               return 0;
+
+       emit->current_node++;
+       emit->node_first_tex = code->tex.length;
+       emit->node_first_alu = code->alu.length;
+       emit->node_flags = 0;
+       return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+       unsigned int unit;
+       unsigned int dest;
+       unsigned int opcode;
+       PROG_CODE;
+
+       if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
+               error("Too many TEX instructions");
+               return 0;
+       }
+
+       unit = inst->U.I.TexSrcUnit;
+       dest = inst->U.I.DstReg.Index;
+
+       switch(inst->U.I.Opcode) {
+       case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+       case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+       case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+       case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+       default:
+               error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+               return 0;
+       }
+
+       if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+               unit = 0;
+               dest = 0;
+       } else {
+               use_temporary(code, dest);
+       }
+
+       use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+       code->tex.inst[code->tex.length++] =
+               ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+                       & R300_SRC_ADDR_MASK)
+               | ((dest << R300_DST_ADDR_SHIFT)
+                       & R300_DST_ADDR_MASK)
+               | (unit << R300_TEX_ID_SHIFT)
+               | (opcode << R300_TEX_INST_SHIFT)
+               | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+                       R400_SRC_ADDR_EXT_BIT : 0)
+               | (dest >= R300_PFS_NUM_TEMP_REGS ?
+                       R400_DST_ADDR_EXT_BIT : 0)
+               ;
+       return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+       struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+       struct r300_emit_state emit;
+       struct r300_fragment_program_code *code = &compiler->code->code.r300;
+       unsigned int tex_end;
+
+       memset(&emit, 0, sizeof(emit));
+       emit.compiler = compiler;
+
+       memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+       for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+           inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+           inst = inst->Next) {
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+                               begin_tex(&emit);
+                               continue;
+                       }
+
+                       emit_tex(&emit, inst);
+               } else {
+                       emit_alu(&emit, &inst->U.P);
+               }
+       }
+
+       if (code->pixsize >= compiler->Base.max_temp_regs)
+               rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+       if (compiler->Base.Error)
+               return;
+
+       /* Finish the program */
+       finish_node(&emit);
+
+       code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+       /* Set r400 extended instruction fields.  These values will be ignored
+        * on r300 cards. */
+       code->r400_code_offset_ext |=
+               (get_msbs_alu(0)
+                               << R400_ALU_OFFSET_MSB_SHIFT)
+               | (get_msbs_alu(code->alu.length - 1)
+                               << R400_ALU_SIZE_MSB_SHIFT);
+
+       tex_end = code->tex.length ? code->tex.length - 1 : 0;
+       code->code_offset =
+               ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+                       & R300_PFS_CNTL_ALU_OFFSET_MASK)
+               | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+                       & R300_PFS_CNTL_ALU_END_MASK)
+               | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+                       & R300_PFS_CNTL_TEX_OFFSET_MASK)
+               | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+                       & R300_PFS_CNTL_TEX_END_MASK)
+               | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+               | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+               ;
+
+       if (emit.current_node < 3) {
+               int shift = 3 - emit.current_node;
+               int i;
+               for(i = emit.current_node; i >= 0; --i)
+                       code->code_addr[shift + i] = code->code_addr[i];
+               for(i = 0; i < shift; ++i)
+                       code->code_addr[i] = 0;
+       }
+
+       if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+           || code->alu.length > R300_PFS_MAX_ALU_INST
+           || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+               code->r390_mode = 1;
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644 (file)
index 0000000..b7bca8c
--- /dev/null
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+       unsigned int hash; /**< swizzle value this matches */
+       unsigned int base; /**< base value for hw swizzle */
+       unsigned int stride; /**< difference in base between arg0/1/2 */
+       unsigned int srcp_stride; /**< difference in base between arg0/scrp */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+       {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
+       {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
+       {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
+       {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
+       {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
+       {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
+       {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
+       {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
+       {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
+       {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
+       {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+       int i, comp;
+
+       for(i = 0; i < num_native_swizzles; ++i) {
+               const struct swizzle_data* sd = &native_swizzles[i];
+               for(comp = 0; comp < 3; ++comp) {
+                       unsigned int swz = GET_SWZ(swizzle, comp);
+                       if (swz == RC_SWIZZLE_UNUSED)
+                               continue;
+                       if (swz != GET_SWZ(sd->hash, comp))
+                               break;
+               }
+               if (comp == 3)
+                       return sd;
+       }
+
+       return 0;
+}
+
+/**
+ * Determines if the given swizzle is valid for r300/r400.  In most situations
+ * it is better to use r300_swizzle_is_native() which can be accesed via
+ * struct radeon_compiler *c; c->SwizzleCaps->IsNative().
+ */
+int r300_swizzle_is_native_basic(unsigned int swizzle)
+{
+       if(lookup_native_swizzle(swizzle))
+               return 1;
+       else
+               return 0;
+}
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+       const struct swizzle_data* sd;
+       unsigned int relevant;
+       int j;
+
+       if (opcode == RC_OPCODE_KIL ||
+           opcode == RC_OPCODE_TEX ||
+           opcode == RC_OPCODE_TXB ||
+           opcode == RC_OPCODE_TXP) {
+               if (reg.Abs || reg.Negate)
+                       return 0;
+
+               for(j = 0; j < 4; ++j) {
+                       unsigned int swz = GET_SWZ(reg.Swizzle, j);
+                       if (swz == RC_SWIZZLE_UNUSED)
+                               continue;
+                       if (swz != j)
+                               return 0;
+               }
+
+               return 1;
+       }
+
+       relevant = 0;
+
+       for(j = 0; j < 3; ++j)
+               if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+                       relevant |= 1 << j;
+
+       if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+               return 0;
+
+       sd = lookup_native_swizzle(reg.Swizzle);
+       if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
+               return 0;
+
+       return 1;
+}
+
+
+static void r300_swizzle_split(
+               struct rc_src_register src, unsigned int mask,
+               struct rc_swizzle_split * split)
+{
+       split->NumPhases = 0;
+
+       while(mask) {
+               unsigned int best_matchcount = 0;
+               unsigned int best_matchmask = 0;
+               int i, comp;
+
+               for(i = 0; i < num_native_swizzles; ++i) {
+                       const struct swizzle_data *sd = &native_swizzles[i];
+                       unsigned int matchcount = 0;
+                       unsigned int matchmask = 0;
+                       for(comp = 0; comp < 3; ++comp) {
+                               unsigned int swz;
+                               if (!GET_BIT(mask, comp))
+                                       continue;
+                               swz = GET_SWZ(src.Swizzle, comp);
+                               if (swz == RC_SWIZZLE_UNUSED)
+                                       continue;
+                               if (swz == GET_SWZ(sd->hash, comp)) {
+                                       /* check if the negate bit of current component
+                                        * is the same for already matched components */
+                                       if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+                                               continue;
+
+                                       matchcount++;
+                                       matchmask |= 1 << comp;
+                               }
+                       }
+                       if (matchcount > best_matchcount) {
+                               best_matchcount = matchcount;
+                               best_matchmask = matchmask;
+                               if (matchmask == (mask & RC_MASK_XYZ))
+                                       break;
+                       }
+               }
+
+               if (mask & RC_MASK_W)
+                       best_matchmask |= RC_MASK_W;
+
+               split->Phase[split->NumPhases++] = best_matchmask;
+               mask &= ~best_matchmask;
+       }
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+       .IsNative = r300_swizzle_is_native,
+       .Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+       const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+       if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
+               fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+               return 0;
+       }
+
+       if (src == RC_PAIR_PRESUB_SRC) {
+               return sd->base + sd->srcp_stride;
+       } else {
+               return sd->base + src*sd->stride;
+       }
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+       unsigned int swz = GET_SWZ(swizzle, 0);
+       if (src == RC_PAIR_PRESUB_SRC) {
+               return R300_ALU_ARGA_SRCP_X + swz;
+       }
+       if (swz < 3)
+               return swz + 3*src;
+
+       switch(swz) {
+       case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+       case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+       case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+       case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+       default: return R300_ALU_ARGA_ONE;
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644 (file)
index 0000000..f2635be
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+int r300_swizzle_is_native_basic(unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
new file mode 100644 (file)
index 0000000..bb6c010
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "radeon_rename_regs.h"
+#include "radeon_remove_constants.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+               void (*callback)(void *, unsigned int, unsigned int))
+{
+       struct r300_fragment_program_compiler * c = userdata;
+       callback(data, c->OutputColor[0], RC_MASK_XYZW);
+       callback(data, c->OutputColor[1], RC_MASK_XYZW);
+       callback(data, c->OutputColor[2], RC_MASK_XYZW);
+       callback(data, c->OutputColor[3], RC_MASK_XYZW);
+       callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
+{
+       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+       struct rc_instruction *rci;
+
+       for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+               struct rc_sub_instruction * inst = &rci->U.I;
+               unsigned i;
+               const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
+
+               if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+                       continue;
+
+               if (inst->DstReg.WriteMask & RC_MASK_Z) {
+                       inst->DstReg.WriteMask = RC_MASK_W;
+               } else {
+                       inst->DstReg.WriteMask = 0;
+                       continue;
+               }
+
+               if (!info->IsComponentwise) {
+                       continue;
+               }
+
+               for (i = 0; i < info->NumSrcRegs; i++) {
+                       inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
+               }
+       }
+}
+
+static int radeon_saturate_output(
+               struct radeon_compiler * c,
+               struct rc_instruction * inst,
+               void* data)
+{
+       const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+
+       if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT)
+               return 0;
+
+       inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+       return 1;
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+       int is_r500 = c->Base.is_r500;
+       int opt = !c->Base.disable_optimizations;
+       int sat_out = c->state.frag_clamp;
+
+       /* Lists of instruction transformations. */
+       struct radeon_program_transformation saturate_output[] = {
+               { &radeon_saturate_output, c },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation rewrite_tex[] = {
+               { &radeonTransformTEX, c },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation rewrite_if[] = {
+               { &r500_transform_IF, 0 },
+               {0, 0}
+       };
+
+       struct radeon_program_transformation native_rewrite_r500[] = {
+               { &radeonTransformALU, 0 },
+               { &radeonTransformDeriv, 0 },
+               { &radeonTransformTrigScale, 0 },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation native_rewrite_r300[] = {
+               { &radeonTransformALU, 0 },
+               { &r300_transform_trig_simple, 0 },
+               { 0, 0 }
+       };
+
+       /* List of compiler passes. */
+       struct radeon_compiler_pass fs_list[] = {
+               /* NAME                         DUMP PREDICATE  FUNCTION                        PARAM */
+               {"rewrite depth out",           1, 1,           rc_rewrite_depth_out,           NULL},
+               /* This transformation needs to be done before any of the IF
+                * instructions are modified. */
+               {"transform KILP",              1, 1,           rc_transform_KILP,              NULL},
+               {"unroll loops",                1, is_r500,     rc_unroll_loops,                NULL},
+               {"transform loops",             1, !is_r500,    rc_transform_loops,             NULL},
+               {"emulate branches",            1, !is_r500,    rc_emulate_branches,            NULL},
+               {"saturate output writes",      1, sat_out,     rc_local_transform,             saturate_output},
+               {"transform TEX",               1, 1,           rc_local_transform,             rewrite_tex},
+               {"transform IF",                1, is_r500,     rc_local_transform,             rewrite_if},
+               {"native rewrite",              1, is_r500,     rc_local_transform,             native_rewrite_r500},
+               {"native rewrite",              1, !is_r500,    rc_local_transform,             native_rewrite_r300},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_use},
+               {"emulate loops",               1, !is_r500,    rc_emulate_loops,               NULL},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
+               {"dataflow swizzles",           1, 1,           rc_dataflow_swizzles,           NULL},
+               {"dead constants",              1, 1,           rc_remove_unused_constants,     &c->code->constants_remap_table},
+               /* This pass makes it easier for the scheduler to group TEX
+                * instructions and reduces the chances of creating too
+                * many texture indirections.*/
+               {"register rename",             1, !is_r500,    rc_rename_regs,                 NULL},
+               {"pair translate",              1, 1,           rc_pair_translate,              NULL},
+               {"pair scheduling",             1, 1,           rc_pair_schedule,               NULL},
+               {"dead sources",                1, 1,           rc_pair_remove_dead_sources, NULL},
+               {"register allocation",         1, 1,           rc_pair_regalloc,               &opt},
+               {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
+               {"machine code generation",     0, is_r500,     r500BuildFragmentProgramHwCode, NULL},
+               {"machine code generation",     0, !is_r500,    r300BuildFragmentProgramHwCode, NULL},
+               {"dump machine code",           0, is_r500  && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
+               {"dump machine code",           0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
+               {NULL, 0, 0, NULL, NULL}
+       };
+
+       c->Base.type = RC_FRAGMENT_PROGRAM;
+       c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
+
+       rc_run_compiler(&c->Base, fs_list);
+
+       rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
new file mode 100644 (file)
index 0000000..654f9a0
--- /dev/null
@@ -0,0 +1,1045 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
+
+struct loop {
+       int BgnLoop;
+
+};
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)  \
+       (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),      \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_src_class(vpi->SrcReg[x].File), \
+                          RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+       /* RC_MASK_* is equivalent to VSF_FLAG_* */
+       return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+       switch (file) {
+       default:
+               fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+               /* fall-through */
+       case RC_FILE_TEMPORARY:
+               return PVS_DST_REG_TEMPORARY;
+       case RC_FILE_OUTPUT:
+               return PVS_DST_REG_OUT;
+       case RC_FILE_ADDRESS:
+               return PVS_DST_REG_A0;
+       }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+                                struct rc_dst_register *dst)
+{
+       if (dst->File == RC_FILE_OUTPUT)
+               return vp->outputs[dst->Index];
+
+       return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+       switch (file) {
+       default:
+               fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+               /* fall-through */
+       case RC_FILE_NONE:
+       case RC_FILE_TEMPORARY:
+               return PVS_SRC_REG_TEMPORARY;
+       case RC_FILE_INPUT:
+               return PVS_SRC_REG_INPUT;
+       case RC_FILE_CONSTANT:
+               return PVS_SRC_REG_CONSTANT;
+       }
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+       unsigned long aclass = t_src_class(a.File);
+       unsigned long bclass = t_src_class(b.File);
+
+       if (aclass != bclass)
+               return 0;
+       if (aclass == PVS_SRC_REG_TEMPORARY)
+               return 0;
+
+       if (a.RelAddr || b.RelAddr)
+               return 1;
+       if (a.Index != b.Index)
+               return 1;
+
+       return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+       /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+       return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+                                struct rc_src_register *src)
+{
+       if (src->File == RC_FILE_INPUT) {
+               assert(vp->inputs[src->Index] != -1);
+               return vp->inputs[src->Index];
+       } else {
+               if (src->Index < 0) {
+                       fprintf(stderr,
+                               "negative offsets for indirect addressing do not work.\n");
+                       return 0;
+               }
+               return src->Index;
+       }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+                          struct rc_src_register *src)
+{
+       /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+        */
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 1)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 2)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 3)),
+                              t_src_class(src->File),
+                              src->Negate) |
+              (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+                                 struct rc_src_register *src)
+{
+       /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+        */
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_src_class(src->File),
+                              src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+              (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+                          struct rc_dst_register *dst)
+{
+       if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+               return 0;
+       } else if (dst->File == RC_FILE_ADDRESS) {
+               assert(dst->Index == 0);
+       }
+
+       return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+                               unsigned int hw_opcode,
+                               struct rc_sub_instruction *vpi,
+                               unsigned int * inst)
+{
+       inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+                                    0,
+                                    0,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &vpi->SrcReg[0]);
+       inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+       inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+                               unsigned int hw_opcode,
+                               struct rc_sub_instruction *vpi,
+                               unsigned int * inst)
+{
+       inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+                                    0,
+                                    0,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &vpi->SrcReg[0]);
+       inst[2] = t_src(vp, &vpi->SrcReg[1]);
+       inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+                               unsigned int hw_opcode,
+                               struct rc_sub_instruction *vpi,
+                               unsigned int * inst)
+{
+       inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+                                    1,
+                                    0,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+       inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+       inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+                                     struct rc_sub_instruction *vpi,
+                                     unsigned int * inst)
+{
+       //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+       inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+                                    1,
+                                    0,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       /* NOTE: Users swizzling might not work. */
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),      // X
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),        // Y
+                                 t_src_class(vpi->SrcReg[0].File),
+                                 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+           (vpi->SrcReg[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),        // X
+                                 t_src_class(vpi->SrcReg[0].File),
+                                 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+           (vpi->SrcReg[0].RelAddr << 4);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),        // X
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
+                                 t_src_class(vpi->SrcReg[0].File),
+                                 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+           (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+                                     struct rc_sub_instruction *vpi,
+                                     unsigned int * inst)
+{
+       unsigned int i;
+       /* Remarks about hardware limitations of MAD
+        * (please preserve this comment, as this information is _NOT_
+        * in the documentation provided by AMD).
+        *
+        * As described in the documentation, MAD with three unique temporary
+        * source registers requires the use of the macro version.
+        *
+        * However (and this is not mentioned in the documentation), apparently
+        * the macro version is _NOT_ a full superset of the normal version.
+        * In particular, the macro version does not always work when relative
+        * addressing is used in the source operands.
+        *
+        * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+        * assembly shader path when using medium quality animations
+        * (i.e. animations with matrix blending instead of quaternion blending).
+        *
+        * Unfortunately, I (nha) have been unable to extract a Piglit regression
+        * test for this issue - for some reason, it is possible to have vertex
+        * programs whose prefix is *exactly* the same as the prefix of the
+        * offending program in Sauerbraten up to the offending instruction
+        * without causing any trouble.
+        *
+        * Bottom line: Only use the macro version only when really necessary;
+        * according to AMD docs, this should improve performance by one clock
+        * as a nice side bonus.
+        */
+       if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+           vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+           vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+           vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+           vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+           vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+               inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+                               0,
+                               1,
+                               t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask),
+                               t_dst_class(vpi->DstReg.File));
+       } else {
+               inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                               0,
+                               0,
+                               t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask),
+                               t_dst_class(vpi->DstReg.File));
+
+               /* Arguments with constant swizzles still count as a unique
+                * temporary, so we should make sure these arguments share a
+                * register index with one of the other arguments. */
+               for (i = 0; i < 3; i++) {
+                       unsigned int j;
+                       if (vpi->SrcReg[i].File != RC_FILE_NONE)
+                               continue;
+
+                       for (j = 0; j < 3; j++) {
+                               if (i != j) {
+                                       vpi->SrcReg[i].Index =
+                                               vpi->SrcReg[j].Index;
+                                       break;
+                               }
+                       }
+               }
+       }
+       inst[1] = t_src(vp, &vpi->SrcReg[0]);
+       inst[2] = t_src(vp, &vpi->SrcReg[1]);
+       inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+                                     struct rc_sub_instruction *vpi,
+                                     unsigned int * inst)
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+                                    1,
+                                    0,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+       inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+       inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+static void mark_write(void * userdata,        struct rc_instruction * inst,
+               rc_register_file file,  unsigned int index, unsigned int mask)
+{
+       unsigned int * writemasks = userdata;
+
+       if (file != RC_FILE_TEMPORARY)
+               return;
+
+       if (index >= R300_VS_MAX_TEMPS)
+               return;
+
+       writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+       return PVS_SRC_OPERAND(compiler->PredicateIndex,
+               t_swizzle(RC_SWIZZLE_ZERO),
+               t_swizzle(RC_SWIZZLE_ZERO),
+               t_swizzle(RC_SWIZZLE_ZERO),
+               t_swizzle(RC_SWIZZLE_W),
+               t_src_class(RC_FILE_TEMPORARY),
+               0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+                                       unsigned int hw_opcode, int is_math)
+{
+       return PVS_OP_DST_OPERAND(hw_opcode,
+            is_math,
+            0,
+            compiler->PredicateIndex,
+            RC_MASK_W,
+            t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+                                       struct rc_instruction *rci,
+                                       unsigned int * inst,
+                                       unsigned int branch_depth)
+{
+       unsigned int predicate_opcode;
+       int is_math = 0;
+
+       if (!compiler->Base.is_r500) {
+               rc_error(&compiler->Base,"Opcode IF not supported\n");
+               return;
+       }
+
+       /* Reserve a temporary to use as our predicate stack counter, if we
+        * don't already have one. */
+       if (!compiler->PredicateMask) {
+               unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+               struct rc_instruction * inst;
+               unsigned int i;
+               memset(writemasks, 0, sizeof(writemasks));
+               for(inst = compiler->Base.Program.Instructions.Next;
+                               inst != &compiler->Base.Program.Instructions;
+                                                       inst = inst->Next) {
+                       rc_for_all_writes_mask(inst, mark_write, writemasks);
+               }
+               for(i = 0; i < compiler->Base.max_temp_regs; i++) {
+                       unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+                       /* Only the W component can be used fo the predicate
+                        * stack counter. */
+                       if (mask & RC_MASK_W) {
+                               compiler->PredicateMask = RC_MASK_W;
+                               compiler->PredicateIndex = i;
+                               break;
+                       }
+               }
+               if (i == compiler->Base.max_temp_regs) {
+                       rc_error(&compiler->Base, "No free temporary to use for"
+                                       " predicate stack counter.\n");
+                       return;
+               }
+       }
+       predicate_opcode =
+                       branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+       rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+       if (branch_depth == 0) {
+               is_math = 1;
+               predicate_opcode = ME_PRED_SET_NEQ;
+               inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+               inst[2] = 0;
+       } else {
+               predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+               inst[1] = t_pred_src(compiler);
+               inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+       }
+
+       inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+       inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+                                                       unsigned int * inst)
+{
+       if (!compiler->Base.is_r500) {
+               rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+               return;
+       }
+       inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+       inst[1] = t_pred_src(compiler);
+       inst[2] = 0;
+       inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+                                                       unsigned int * inst)
+{
+       if (!compiler->Base.is_r500) {
+               rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+               return;
+       }
+       inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+       inst[1] = t_pred_src(compiler);
+       inst[2] = 0;
+       inst[3] = 0;
+}
+
+static void translate_vertex_program(struct radeon_compiler *c, void *user)
+{
+       struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+       struct rc_instruction *rci;
+
+       struct loop * loops = NULL;
+       int current_loop_depth = 0;
+       int loops_reserved = 0;
+
+       unsigned int branch_depth = 0;
+
+       compiler->code->pos_end = 0;    /* Not supported yet */
+       compiler->code->length = 0;
+       compiler->code->num_temporaries = 0;
+
+       compiler->SetHwInputOutput(compiler);
+
+       for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+               struct rc_sub_instruction *vpi = &rci->U.I;
+               unsigned int *inst = compiler->code->body.d + compiler->code->length;
+               const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
+
+               /* Skip instructions writing to non-existing destination */
+               if (!valid_dst(compiler->code, &vpi->DstReg))
+                       continue;
+
+               if (info->HasDstReg) {
+                       /* Neither is Saturate. */
+                       if (vpi->SaturateMode != RC_SATURATE_NONE) {
+                               rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+                                        "modifier (yet).\n");
+                       }
+               }
+
+               if (compiler->code->length >= c->max_alu_insts * 4) {
+                       rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+                       return;
+               }
+
+               assert(compiler->Base.is_r500 ||
+                      (vpi->Opcode != RC_OPCODE_SEQ &&
+                       vpi->Opcode != RC_OPCODE_SNE));
+
+               switch (vpi->Opcode) {
+               case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+               case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+               case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+               case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+               case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+               case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+               case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
+               case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+               case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+               case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+               case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
+               case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+               case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+               case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+               case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+               case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+               case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+               case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+               case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+               case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+               case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+               case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+               case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+               case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+               case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+               case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+               case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+               case RC_OPCODE_BGNLOOP:
+               {
+                       struct loop * l;
+
+                       if ((!compiler->Base.is_r500
+                               && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+                               || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+                               rc_error(&compiler->Base,
+                                               "Loops are nested too deep.");
+                               return;
+                       }
+                       memory_pool_array_reserve(&compiler->Base.Pool,
+                                       struct loop, loops, current_loop_depth,
+                                       loops_reserved, 1);
+                       l = &loops[current_loop_depth++];
+                       memset(l , 0, sizeof(struct loop));
+                       l->BgnLoop = (compiler->code->length / 4);
+                       continue;
+               }
+               case RC_OPCODE_ENDLOOP:
+               {
+                       struct loop * l;
+                       unsigned int act_addr;
+                       unsigned int last_addr;
+                       unsigned int ret_addr;
+
+                       assert(loops);
+                       l = &loops[current_loop_depth - 1];
+                       act_addr = l->BgnLoop - 1;
+                       last_addr = (compiler->code->length / 4) - 1;
+                       ret_addr = l->BgnLoop;
+
+                       if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+                               rc_error(&compiler->Base,
+                                       "Too many flow control instructions.");
+                               return;
+                       }
+                       if (compiler->Base.is_r500) {
+                               compiler->code->fc_op_addrs.r500
+                                       [compiler->code->num_fc_ops].lw =
+                                       R500_PVS_FC_ACT_ADRS(act_addr)
+                                       | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+                                       ;
+                               compiler->code->fc_op_addrs.r500
+                                       [compiler->code->num_fc_ops].uw =
+                                       R500_PVS_FC_LAST_INST(last_addr)
+                                       | R500_PVS_FC_RTN_INST(ret_addr)
+                                       ;
+                       } else {
+                               compiler->code->fc_op_addrs.r300
+                                       [compiler->code->num_fc_ops] =
+                                       R300_PVS_FC_ACT_ADRS(act_addr)
+                                       | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+                                       | R300_PVS_FC_LAST_INST(last_addr)
+                                       | R300_PVS_FC_RTN_INST(ret_addr)
+                                       ;
+                       }
+                       compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+                               R300_PVS_FC_LOOP_INIT_VAL(0x0)
+                               | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+                               ;
+                       compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+                                               compiler->code->num_fc_ops);
+                       compiler->code->num_fc_ops++;
+                       current_loop_depth--;
+                       continue;
+               }
+
+               default:
+                       rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
+                       return;
+               }
+
+               /* Non-flow control instructions that are inside an if statement
+                * need to pay attention to the predicate bit. */
+               if (branch_depth
+                       && vpi->Opcode != RC_OPCODE_IF
+                       && vpi->Opcode != RC_OPCODE_ELSE
+                       && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+                       inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+                                               << PVS_DST_PRED_ENABLE_SHIFT);
+                       inst[0] |= (PVS_DST_PRED_SENSE_MASK
+                                               << PVS_DST_PRED_SENSE_SHIFT);
+               }
+
+               /* Update the number of temporaries. */
+               if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+                   vpi->DstReg.Index >= compiler->code->num_temporaries)
+                       compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+               for (unsigned i = 0; i < info->NumSrcRegs; i++)
+                       if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+                           vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+               if (compiler->PredicateMask)
+                       if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+               if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+                       rc_error(&compiler->Base, "Too many temporaries.\n");
+                       return;
+               }
+
+               compiler->code->length += 4;
+
+               if (compiler->Base.Error)
+                       return;
+       }
+}
+
+struct temporary_allocation {
+       unsigned int Allocated:1;
+       unsigned int HwTemp:15;
+       struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
+{
+       struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+       struct rc_instruction *inst;
+       struct rc_instruction *end_loop = NULL;
+       unsigned int num_orig_temps = 0;
+       char hwtemps[RC_REGISTER_MAX_INDEX];
+       struct temporary_allocation * ta;
+       unsigned int i, j;
+
+       memset(hwtemps, 0, sizeof(hwtemps));
+
+       rc_recompute_ips(c);
+
+       /* Pass 1: Count original temporaries. */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                               if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+                                       num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+                       }
+               }
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+                               if (inst->U.I.DstReg.Index >= num_orig_temps)
+                                       num_orig_temps = inst->U.I.DstReg.Index + 1;
+                       }
+               }
+       }
+
+       ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+                       sizeof(struct temporary_allocation) * num_orig_temps);
+       memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+       /* Pass 2: Determine original temporary lifetimes */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               /* Instructions inside of loops need to use the ENDLOOP
+                * instruction as their LastRead. */
+               if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+                       int endloops = 1;
+                       struct rc_instruction * ptr;
+                       for(ptr = inst->Next;
+                               ptr != &compiler->Base.Program.Instructions;
+                                                       ptr = ptr->Next){
+                               if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+                                       endloops++;
+                               } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+                                       endloops--;
+                                       if (endloops <= 0) {
+                                               end_loop = ptr;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+
+               if (inst == end_loop) {
+                       end_loop = NULL;
+                       continue;
+               }
+
+               for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                               ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
+                       }
+               }
+       }
+
+       /* Pass 3: Register allocation */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                               unsigned int orig = inst->U.I.SrcReg[i].Index;
+                               inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+                               if (ta[orig].Allocated && inst == ta[orig].LastRead)
+                                       hwtemps[ta[orig].HwTemp] = 0;
+                       }
+               }
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+                               unsigned int orig = inst->U.I.DstReg.Index;
+
+                               if (!ta[orig].Allocated) {
+                                       for(j = 0; j < c->max_temp_regs; ++j) {
+                                               if (!hwtemps[j])
+                                                       break;
+                                       }
+                                       ta[orig].Allocated = 1;
+                                       ta[orig].HwTemp = j;
+                                       hwtemps[ta[orig].HwTemp] = 1;
+                               }
+
+                               inst->U.I.DstReg.Index = ta[orig].HwTemp;
+                       }
+               }
+       }
+}
+
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+       struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       void* unused)
+{
+       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned i;
+
+       /* Transform ABS(a) to MAX(a, -a). */
+       for (i = 0; i < opcode->NumSrcRegs; i++) {
+               if (inst->U.I.SrcReg[i].Abs) {
+                       struct rc_instruction *new_inst;
+                       unsigned temp;
+
+                       inst->U.I.SrcReg[i].Abs = 0;
+
+                       temp = rc_find_free_temporary(c);
+
+                       new_inst = rc_insert_new_instruction(c, inst->Prev);
+                       new_inst->U.I.Opcode = RC_OPCODE_MAX;
+                       new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       new_inst->U.I.DstReg.Index = temp;
+                       new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+                       new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+                       new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+                       memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+                       inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+                       inst->U.I.SrcReg[i].Index = temp;
+                       inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+               }
+       }
+       return 1;
+}
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+       struct radeon_compiler *c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+       if (opcode->NumSrcRegs == 3) {
+               if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+                   || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+                       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mov->U.I.DstReg.Index = tmpreg;
+                       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+                       reset_srcreg(&inst->U.I.SrcReg[2]);
+                       inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+                       inst->U.I.SrcReg[2].Index = tmpreg;
+               }
+       }
+
+       if (opcode->NumSrcRegs >= 2) {
+               if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+                       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mov->U.I.DstReg.Index = tmpreg;
+                       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+                       reset_srcreg(&inst->U.I.SrcReg[1]);
+                       inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                       inst->U.I.SrcReg[1].Index = tmpreg;
+               }
+       }
+
+       return 1;
+}
+
+static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
+{
+       struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
+       int i;
+
+       for(i = 0; i < 32; ++i) {
+               if ((compiler->RequiredOutputs & (1 << i)) &&
+                   !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+                       struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+
+                       inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+                       inst->U.I.DstReg.Index = i;
+                       inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+                       inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+                       inst->U.I.SrcReg[0].Index = 0;
+                       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+                       compiler->Base.Program.OutputsWritten |= 1 << i;
+               }
+       }
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+               void (*callback)(void *, unsigned int, unsigned int))
+{
+       struct r300_vertex_program_compiler * c = userdata;
+       int i;
+
+       for(i = 0; i < 32; ++i) {
+               if (c->RequiredOutputs & (1 << i))
+                       callback(data, i, RC_MASK_XYZW);
+       }
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+       (void) opcode;
+       (void) reg;
+
+       return 1;
+}
+
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+                                         struct rc_instruction *arl,
+                                         struct rc_instruction *end,
+                                         int min_offset)
+{
+       struct rc_instruction *inst, *add;
+       unsigned const_swizzle;
+
+       /* Transform ARL */
+       add = rc_insert_new_instruction(&c->Base, arl->Prev);
+       add->U.I.Opcode = RC_OPCODE_ADD;
+       add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+       add->U.I.DstReg.WriteMask = RC_MASK_X;
+       add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+       add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+       add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+                                                                    min_offset, &const_swizzle);
+       add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+       arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+       arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+       /* Rewrite offsets up to and excluding inst. */
+       for (inst = arl->Next; inst != end; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+                       if (inst->U.I.SrcReg[i].RelAddr)
+                               inst->U.I.SrcReg[i].Index -= min_offset;
+       }
+}
+
+static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
+{
+       struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
+       struct rc_instruction *inst, *lastARL = NULL;
+       int min_offset = 0;
+
+       for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+                       if (lastARL != NULL && min_offset < 0)
+                               transform_negative_addressing(c, lastARL, inst, min_offset);
+
+                       lastARL = inst;
+                       min_offset = 0;
+                       continue;
+               }
+
+               for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+                       if (inst->U.I.SrcReg[i].RelAddr &&
+                           inst->U.I.SrcReg[i].Index < 0) {
+                               /* ARL must precede any indirect addressing. */
+                               if (lastARL == NULL) {
+                                       rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+                                       return;
+                               }
+
+                               if (inst->U.I.SrcReg[i].Index < min_offset)
+                                       min_offset = inst->U.I.SrcReg[i].Index;
+                       }
+               }
+       }
+
+       if (lastARL != NULL && min_offset < 0)
+               transform_negative_addressing(c, lastARL, inst, min_offset);
+}
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+       .IsNative = &swizzle_is_native,
+       .Split = 0 /* should never be called */
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
+{
+       int is_r500 = c->Base.is_r500;
+       int opt = !c->Base.disable_optimizations;
+
+       /* Lists of instruction transformations. */
+       struct radeon_program_transformation alu_rewrite_r500[] = {
+               { &r300_transform_vertex_alu, 0 },
+               { &r300_transform_trig_scale_vertex, 0 },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation alu_rewrite_r300[] = {
+               { &r300_transform_vertex_alu, 0 },
+               { &r300_transform_trig_simple, 0 },
+               { 0, 0 }
+       };
+
+       /* Note: These passes have to be done seperately from ALU rewrite,
+        * otherwise non-native ALU instructions with source conflits
+        * or non-native modifiers will not be treated properly.
+        */
+       struct radeon_program_transformation emulate_modifiers[] = {
+               { &transform_nonnative_modifiers, 0 },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation resolve_src_conflicts[] = {
+               { &transform_source_conflicts, 0 },
+               { 0, 0 }
+       };
+
+       /* List of compiler passes. */
+       struct radeon_compiler_pass vs_list[] = {
+               /* NAME                         DUMP PREDICATE  FUNCTION                        PARAM */
+               {"add artificial outputs",      0, 1,           rc_vs_add_artificial_outputs,   NULL},
+               {"transform loops",             1, 1,           rc_transform_loops,             NULL},
+               {"emulate branches",            1, !is_r500,    rc_emulate_branches,            NULL},
+               {"emulate negative addressing", 1, 1,           rc_emulate_negative_addressing, NULL},
+               {"native rewrite",              1, is_r500,     rc_local_transform,             alu_rewrite_r500},
+               {"native rewrite",              1, !is_r500,    rc_local_transform,             alu_rewrite_r300},
+               {"emulate modifiers",           1, !is_r500,    rc_local_transform,             emulate_modifiers},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_used},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
+               /* This pass must be done after optimizations. */
+               {"source conflict resolve",     1, 1,           rc_local_transform,             resolve_src_conflicts},
+               {"register allocation",         1, opt,         allocate_temporary_registers,   NULL},
+               {"dead constants",              1, 1,           rc_remove_unused_constants,     &c->code->constants_remap_table},
+               {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
+               {"machine code generation",     0, 1,           translate_vertex_program,       NULL},
+               {"dump machine code",           0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,        NULL},
+               {NULL, 0, 0, NULL, NULL}
+       };
+
+       c->Base.type = RC_VERTEX_PROGRAM;
+       c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+       rc_run_compiler(&c->Base, vs_list);
+
+       c->code->InputsRead = c->Base.Program.InputsRead;
+       c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+       rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644 (file)
index 0000000..2bc0a87
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+#include "radeon_code.h"
+#include "../r300_reg.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+       /* R300 vector ops */
+       "                 VE_NO_OP",
+       "           VE_DOT_PRODUCT",
+       "              VE_MULTIPLY",
+       "                   VE_ADD",
+       "          VE_MULTIPLY_ADD",
+       "       VE_DISTANCE_FACTOR",
+       "              VE_FRACTION",
+       "               VE_MAXIMUM",
+       "               VE_MINIMUM",
+       "VE_SET_GREATER_THAN_EQUAL",
+       "         VE_SET_LESS_THAN",
+       "        VE_MULTIPLYX2_ADD",
+       "        VE_MULTIPLY_CLAMP",
+       "            VE_FLT2FIX_DX",
+       "        VE_FLT2FIX_DX_RND",
+       /* R500 vector ops */
+       "      VE_PRED_SET_EQ_PUSH",
+       "      VE_PRED_SET_GT_PUSH",
+       "     VE_PRED_SET_GTE_PUSH",
+       "     VE_PRED_SET_NEQ_PUSH",
+       "         VE_COND_WRITE_EQ",
+       "         VE_COND_WRITE_GT",
+       "        VE_COND_WRITE_GTE",
+       "        VE_COND_WRITE_NEQ",
+       "           VE_COND_MUX_EQ",
+       "           VE_COND_MUX_GT",
+       "          VE_COND_MUX_GTE",
+       "      VE_SET_GREATER_THAN",
+       "             VE_SET_EQUAL",
+       "         VE_SET_NOT_EQUAL",
+       "               (reserved)",
+       "               (reserved)",
+       "               (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+       /* R300 math ops */
+       "                 ME_NO_OP",
+       "          ME_EXP_BASE2_DX",
+       "          ME_LOG_BASE2_DX",
+       "          ME_EXP_BASEE_FF",
+       "        ME_LIGHT_COEFF_DX",
+       "         ME_POWER_FUNC_FF",
+       "              ME_RECIP_DX",
+       "              ME_RECIP_FF",
+       "         ME_RECIP_SQRT_DX",
+       "         ME_RECIP_SQRT_FF",
+       "              ME_MULTIPLY",
+       "     ME_EXP_BASE2_FULL_DX",
+       "     ME_LOG_BASE2_FULL_DX",
+       " ME_POWER_FUNC_FF_CLAMP_B",
+       "ME_POWER_FUNC_FF_CLAMP_B1",
+       "ME_POWER_FUNC_FF_CLAMP_01",
+       "                   ME_SIN",
+       "                   ME_COS",
+       /* R500 math ops */
+       "        ME_LOG_BASE2_IEEE",
+       "            ME_RECIP_IEEE",
+       "       ME_RECIP_SQRT_IEEE",
+       "           ME_PRED_SET_EQ",
+       "           ME_PRED_SET_GT",
+       "          ME_PRED_SET_GTE",
+       "          ME_PRED_SET_NEQ",
+       "          ME_PRED_SET_CLR",
+       "          ME_PRED_SET_INV",
+       "          ME_PRED_SET_POP",
+       "      ME_PRED_SET_RESTORE",
+       "               (reserved)",
+       "               (reserved)",
+       "               (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+       "t",
+       "i",
+       "c",
+       "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+       "t",
+       "a0",
+       "o",
+       "ox",
+       "a",
+       "i",
+       "u",
+       "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+       "X",
+       "Y",
+       "Z",
+       "W",
+       "0",
+       "1",
+       "U",
+       "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+       fprintf(stderr, " dst: %d%s op: ",
+                       (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+       if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+               fprintf(stderr, "PRED %u",
+                               (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+       }
+       if (op & 0x80) {
+               if (op & 0x1) {
+                       fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+               } else {
+                       fprintf(stderr, "   PVS_MACRO_OP_2CLK_MADD\n");
+               }
+       } else if (op & 0x40) {
+               fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+       } else {
+               fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+       }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+       fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+                       (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+                       src & (1 << 25) ? "-" : " ",
+                       r300_vs_swiz_debug[(src >> 13) & 0x7],
+                       src & (1 << 26) ? "-" : " ",
+                       r300_vs_swiz_debug[(src >> 16) & 0x7],
+                       src & (1 << 27) ? "-" : " ",
+                       r300_vs_swiz_debug[(src >> 19) & 0x7],
+                       src & (1 << 28) ? "-" : " ",
+                       r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
+{
+       struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
+       struct r300_vertex_program_code * vs = c->code;
+       unsigned instrcount = vs->length / 4;
+       unsigned i;
+
+       fprintf(stderr, "Final vertex program code:\n");
+
+       for(i = 0; i < instrcount; i++) {
+               unsigned offset = i*4;
+               unsigned src;
+
+               fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+               r300_vs_op_dump(vs->body.d[offset]);
+
+               for(src = 0; src < 3; ++src) {
+                       fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+                       r300_vs_src_dump(vs->body.d[offset+1+src]);
+               }
+       }
+
+       fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+       for(i = 0; i < vs->num_fc_ops; i++) {
+               switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+               case 0: fprintf(stderr, "NOP"); break;
+               case 1: fprintf(stderr, "JUMP"); break;
+               case 2: fprintf(stderr, "LOOP"); break;
+               case 3: fprintf(stderr, "JSR"); break;
+               }
+               if (c->Base.is_r500) {
+                       fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+                               vs->fc_op_addrs.r500[i].uw,
+                               vs->fc_op_addrs.r500[i].lw);
+               } else {
+                       fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c
new file mode 100644 (file)
index 0000000..cf99f5e
--- /dev/null
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_if,
+       void *data)
+{
+       struct rc_variable * writer;
+       struct rc_list * writer_list, * list_ptr;
+       struct rc_list * var_list = rc_get_variables(c);
+       unsigned int generic_if = 0;
+       unsigned int alu_chan;
+
+       if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
+               return 0;
+       }
+
+       writer_list = rc_variable_list_get_writers(
+                       var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+       if (!writer_list) {
+               generic_if = 1;
+       } else {
+
+               /* Make sure it is safe for the writers to write to
+                * ALU Result */
+               for (list_ptr = writer_list; list_ptr;
+                                               list_ptr = list_ptr->Next) {
+                       struct rc_instruction * inst;
+                       writer = list_ptr->Item;
+                       /* We are going to modify the destination register
+                        * of writer, so if it has a reader other than
+                        * inst_if (aka ReaderCount > 1) we must fall back to
+                        * our generic IF.
+                        * If the writer has a lower IP than inst_if, this
+                        * means that inst_if is above the writer in a loop.
+                        * I'm not sure why this would ever happen, but
+                        * if it does we want to make sure we fall back
+                        * to our generic IF. */
+                       if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+                               generic_if = 1;
+                               break;
+                       }
+
+                       /* The ALU Result is not preserved across IF
+                        * instructions, so if there is another IF
+                        * instruction between writer and inst_if, then
+                        * we need to fall back to generic IF. */
+                       for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+                               const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+                               if (info->IsFlowControl) {
+                                       generic_if = 1;
+                                       break;
+                               }
+                       }
+                       if (generic_if) {
+                               break;
+                       }
+               }
+       }
+
+       if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+               alu_chan = RC_ALURESULT_X;
+       } else {
+               alu_chan = RC_ALURESULT_W;
+       }
+       if (generic_if) {
+               struct rc_instruction * inst_mov =
+                               rc_insert_new_instruction(c, inst_if->Prev);
+
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.WriteMask = 0;
+               inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+               inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+               inst_mov->U.I.WriteALUResult = alu_chan;
+               inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+               if (alu_chan == RC_ALURESULT_X) {
+                       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+                                       inst_mov->U.I.SrcReg[0].Swizzle,
+                                       RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+               } else {
+                       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+                                       inst_mov->U.I.SrcReg[0].Swizzle,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+                                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+               }
+       } else {
+               rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+               unsigned int reverse_srcs = 0;
+               unsigned int preserve_opcode = 0;
+               for (list_ptr = writer_list; list_ptr;
+                                               list_ptr = list_ptr->Next) {
+                       writer = list_ptr->Item;
+                       switch(writer->Inst->U.I.Opcode) {
+                       case RC_OPCODE_SEQ:
+                               compare_func = RC_COMPARE_FUNC_EQUAL;
+                               break;
+                       case RC_OPCODE_SNE:
+                               compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+                               break;
+                       case RC_OPCODE_SLE:
+                               reverse_srcs = 1;
+                               /* Fall through */
+                       case RC_OPCODE_SGE:
+                               compare_func = RC_COMPARE_FUNC_GEQUAL;
+                               break;
+                       case RC_OPCODE_SGT:
+                               reverse_srcs = 1;
+                               /* Fall through */
+                       case RC_OPCODE_SLT:
+                               compare_func = RC_COMPARE_FUNC_LESS;
+                               break;
+                       default:
+                               compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+                               preserve_opcode = 1;
+                               break;
+                       }
+                       if (!preserve_opcode) {
+                               writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+                       }
+                       writer->Inst->U.I.DstReg.WriteMask = 0;
+                       writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+                       writer->Inst->U.I.WriteALUResult = alu_chan;
+                       writer->Inst->U.I.ALUResultCompare = compare_func;
+                       if (reverse_srcs) {
+                               struct rc_src_register temp_src;
+                               temp_src = writer->Inst->U.I.SrcReg[0];
+                               writer->Inst->U.I.SrcReg[0] =
+                                       writer->Inst->U.I.SrcReg[1];
+                               writer->Inst->U.I.SrcReg[1] = temp_src;
+                       }
+               }
+       }
+
+       inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+       inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+       inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+                               RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+                               RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+       inst_if->U.I.SrcReg[0].Negate = 0;
+
+       return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+       unsigned int relevant;
+       int i;
+
+       if (opcode == RC_OPCODE_TEX ||
+           opcode == RC_OPCODE_TXB ||
+           opcode == RC_OPCODE_TXP ||
+           opcode == RC_OPCODE_TXD ||
+           opcode == RC_OPCODE_TXL ||
+           opcode == RC_OPCODE_KIL) {
+               if (reg.Abs)
+                       return 0;
+
+               if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+                       return 0;
+
+               for(i = 0; i < 4; ++i) {
+                       unsigned int swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz == RC_SWIZZLE_UNUSED) {
+                               reg.Negate &= ~(1 << i);
+                               continue;
+                       }
+                       if (swz >= 4)
+                               return 0;
+               }
+
+               if (reg.Negate)
+                       return 0;
+
+               return 1;
+       } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+               /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+                * if it doesn't fit perfectly into a .xyzw case... */
+               if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+                       return 1;
+
+               return 0;
+       } else {
+               /* ALU instructions support almost everything */
+               relevant = 0;
+               for(i = 0; i < 3; ++i) {
+                       unsigned int swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+                               relevant |= 1 << i;
+               }
+               if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+                       return 0;
+
+               return 1;
+       }
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+               struct rc_swizzle_split * split)
+{
+       unsigned int negatebase[2] = { 0, 0 };
+       int i;
+
+       for(i = 0; i < 4; ++i) {
+               unsigned int swz = GET_SWZ(src.Swizzle, i);
+               if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+                       continue;
+               negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+       }
+
+       split->NumPhases = 0;
+
+       for(i = 0; i <= 1; ++i) {
+               if (!negatebase[i])
+                       continue;
+
+               split->Phase[split->NumPhases++] = negatebase[i];
+       }
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+       .IsNative = r500_swizzle_is_native,
+       .Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+  switch(swiz_val) {
+  case 0: return "R";
+  case 1: return "G";
+  case 2: return "B";
+  case 3: return "A";
+  case 4: return "0";
+  case 5: return "H";
+  case 6: return "1";
+  case 7: return "U";
+  }
+  return NULL;
+}
+
+static char *toop(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP3"; break;
+  case 2: str = "DP4"; break;
+  case 3: str = "D2A"; break;
+  case 4: str = "MIN"; break;
+  case 5: str = "MAX"; break;
+  case 6: str = "Reserved"; break;
+  case 7: str = "CND"; break;
+  case 8: str = "CMP"; break;
+  case 9: str = "FRC"; break;
+  case 10: str = "SOP"; break;
+  case 11: str = "MDH"; break;
+  case 12: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP"; break;
+  case 2: str = "MIN"; break;
+  case 3: str = "MAX"; break;
+  case 4: str = "Reserved"; break;
+  case 5: str = "CND"; break;
+  case 6: str = "CMP"; break;
+  case 7: str = "FRC"; break;
+  case 8: str = "EX2"; break;
+  case 9: str = "LN2"; break;
+  case 10: str = "RCP"; break;
+  case 11: str = "RSQ"; break;
+  case 12: str = "SIN"; break;
+  case 13: str = "COS"; break;
+  case 14: str = "MDH"; break;
+  case 15: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_mask(int val)
+{
+  char *str = NULL;
+  switch(val) {
+  case 0: str = "NONE"; break;
+  case 1: str = "R"; break;
+  case 2: str = "G"; break;
+  case 3: str = "RG"; break;
+  case 4: str = "B"; break;
+  case 5: str = "RB"; break;
+  case 6: str = "GB"; break;
+  case 7: str = "RGB"; break;
+  case 8: str = "A"; break;
+  case 9: str = "AR"; break;
+  case 10: str = "AG"; break;
+  case 11: str = "ARG"; break;
+  case 12: str = "AB"; break;
+  case 13: str = "ARB"; break;
+  case 14: str = "AGB"; break;
+  case 15: str = "ARGB"; break;
+  }
+  return str;
+}
+
+static char *to_texop(int val)
+{
+  switch(val) {
+  case 0: return "NOP";
+  case 1: return "LD";
+  case 2: return "TEXKILL";
+  case 3: return "PROJ";
+  case 4: return "LODBIAS";
+  case 5: return "LOD";
+  case 6: return "DXDY";
+  }
+  return NULL;
+}
+
+void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+  struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+  struct r500_fragment_program_code *code = &compiler->code->code.r500;
+  int n, i;
+  uint32_t inst;
+  uint32_t inst0;
+  char *str = NULL;
+  fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+  for (n = 0; n < code->inst_end+1; n++) {
+    inst0 = inst = code->inst[n].inst0;
+    fprintf(stderr,"%d\t0:CMN_INST   0x%08x:", n, inst);
+    switch(inst & 0x3) {
+    case R500_INST_TYPE_ALU: str = "ALU"; break;
+    case R500_INST_TYPE_OUT: str = "OUT"; break;
+    case R500_INST_TYPE_FC: str = "FC"; break;
+    case R500_INST_TYPE_TEX: str = "TEX"; break;
+    };
+    fprintf(stderr,"%s %s %s %s %s ", str,
+           inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+           inst & R500_INST_LAST ? "LAST" : "",
+           inst & R500_INST_NOP ? "NOP" : "",
+           inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+    fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+           to_mask((inst >> 15) & 0xf));
+
+    switch(inst0 & 0x3) {
+    case R500_INST_TYPE_ALU:
+    case R500_INST_TYPE_OUT:
+      fprintf(stderr,"\t1:RGB_ADDR   0x%08x:", code->inst[n].inst1);
+      inst = code->inst[n].inst1;
+
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+             inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+             (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+             (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+             (inst >> 30));
+
+      fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+             inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+             (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+             (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+             (inst >> 30));
+      fprintf(stderr,"\t3 RGB_INST:  0x%08x:", code->inst[n].inst3);
+      inst = code->inst[n].inst3;
+      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+             (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+             (inst >> 11) & 0x3,
+             (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+             (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+      fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+      inst = code->inst[n].inst4;
+      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+             (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+             (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+             (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+             (inst >> 29) & 0x3,
+             (inst >> 31) & 0x1);
+
+      fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+      inst = code->inst[n].inst5;
+      fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+             (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+             (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+             (inst >> 23) & 0x3,
+             (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+      break;
+    case R500_INST_TYPE_FC:
+      fprintf(stderr, "\t2:FC_INST    0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      /* JUMP_FUNC JUMP_ANY*/
+      fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
+          (inst & R500_FC_JUMP_ANY) >> 5);
+      
+      /* OP */
+      switch(inst & 0x7){
+      case R500_FC_OP_JUMP:
+       fprintf(stderr, "JUMP");
+        break;
+      case R500_FC_OP_LOOP:
+        fprintf(stderr, "LOOP");
+        break;
+      case R500_FC_OP_ENDLOOP:
+        fprintf(stderr, "ENDLOOP");
+        break;
+      case R500_FC_OP_REP:
+        fprintf(stderr, "REP");
+        break;
+      case R500_FC_OP_ENDREP:
+        fprintf(stderr, "ENDREP");
+        break;
+      case R500_FC_OP_BREAKLOOP:
+        fprintf(stderr, "BREAKLOOP");
+        break;
+      case R500_FC_OP_BREAKREP:
+        fprintf(stderr, "BREAKREP");
+       break;
+      case R500_FC_OP_CONTINUE:
+        fprintf(stderr, "CONTINUE");
+        break;
+      }
+      fprintf(stderr," "); 
+      /* A_OP */
+      switch(inst & (0x3 << 6)){
+      case R500_FC_A_OP_NONE:
+        fprintf(stderr, "NONE");
+        break;
+      case R500_FC_A_OP_POP:
+       fprintf(stderr, "POP");
+        break;
+      case R500_FC_A_OP_PUSH:
+        fprintf(stderr, "PUSH");
+        break;
+      }
+      /* B_OP0 B_OP1 */
+      for(i=0; i<2; i++){
+        fprintf(stderr, " ");
+        switch(inst & (0x3 << (24 + (i * 2)))){
+        /* R500_FC_B_OP0_NONE 
+        * R500_FC_B_OP1_NONE */
+       case 0:
+          fprintf(stderr, "NONE");
+          break;
+        case R500_FC_B_OP0_DECR:
+        case R500_FC_B_OP1_DECR:
+          fprintf(stderr, "DECR");
+          break;
+        case R500_FC_B_OP0_INCR:
+        case R500_FC_B_OP1_INCR:
+          fprintf(stderr, "INCR");
+          break;
+        }
+      }
+      /*POP_CNT B_ELSE */
+      fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
+      inst = code->inst[n].inst3;
+      /* JUMP_ADDR */
+      fprintf(stderr, " %d", inst >> 16);
+      
+      if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
+        fprintf(stderr, " IGN_UNC");
+      }
+      inst = code->inst[n].inst3;
+      fprintf(stderr, "\n\t3:FC_ADDR    0x%08x:", inst);
+      fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
+      inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); 
+      break;
+    case R500_INST_TYPE_TEX:
+      inst = code->inst[n].inst1;
+      fprintf(stderr,"\t1:TEX_INST:  0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+             to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+             (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"\t2:TEX_ADDR:  0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+             inst & 127, inst & (1<<7) ? "(rel)" : "",
+             toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+             toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+             (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+             toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+             toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+      fprintf(stderr,"\t3:TEX_DXDY:  0x%08x\n", code->inst[n].inst3);
+      break;
+    }
+    fprintf(stderr,"\n");
+  }
+
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h
new file mode 100644 (file)
index 0000000..6aa448c
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_if,
+       void* data);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
new file mode 100644 (file)
index 0000000..c30cd75
--- /dev/null
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+#define PROG_CODE \
+       struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do {                       \
+               rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
+                       __FILE__, __FUNCTION__, ##args);        \
+       } while(0)
+
+
+struct branch_info {
+       int If;
+       int Else;
+       int Endif;
+};
+
+struct r500_loop_info {
+       int BgnLoop;
+
+       int BranchDepth;
+       int * Brks;
+       int BrkCount;
+       int BrkReserved;
+
+       int * Conts;
+       int ContCount;
+       int ContReserved;
+};
+
+struct emit_state {
+       struct radeon_compiler * C;
+       struct r500_fragment_program_code * Code;
+
+       struct branch_info * Branches;
+       unsigned int CurrentBranchDepth;
+       unsigned int BranchesReserved;
+
+       struct r500_loop_info * Loops;
+       unsigned int CurrentLoopDepth;
+       unsigned int LoopsReserved;
+
+       unsigned int MaxBranchDepth;
+
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+       case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
+       case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+       case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+       case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+       case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+       case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+       default:
+               error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+               /* fall through */
+       case RC_OPCODE_NOP:
+               /* fall through */
+       case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+       case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+       case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+       case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+       }
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+       case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
+       case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+       case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+       case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+       case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+       case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+       case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+       case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+       case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+       default:
+               error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+               /* fall through */
+       case RC_OPCODE_NOP:
+               /* fall through */
+       case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+       case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+       case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+       case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+       case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+       case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+       }
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+    switch (swz) {
+        case RC_SWIZZLE_ZERO:
+        case RC_SWIZZLE_UNUSED:
+            swz = 4;
+            break;
+        case RC_SWIZZLE_HALF:
+            swz = 5;
+            break;
+        case RC_SWIZZLE_ONE:
+            swz = 6;
+            break;
+    }
+
+       return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+       unsigned int t = inst->RGB.Arg[arg].Source;
+       int comp;
+       t |= inst->RGB.Arg[arg].Negate << 11;
+       t |= inst->RGB.Arg[arg].Abs << 12;
+
+       for(comp = 0; comp < 3; ++comp)
+               t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+       return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+       unsigned int t = inst->Alpha.Arg[i].Source;
+       t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
+       t |= inst->Alpha.Arg[i].Negate << 5;
+       t |= inst->Alpha.Arg[i].Abs << 6;
+       return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+       switch(func) {
+       case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+       case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+       case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+       case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+       default:
+               rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+               return 0;
+       }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+       if (index > code->max_temp_idx)
+               code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+       /* From docs:
+        *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
+        * MSB = 1 << 7 */
+       if (!src.Used)
+               return 1 << 7;
+
+       if (src.File == RC_FILE_CONSTANT) {
+               return src.Index | R500_RGB_ADDR0_CONST;
+       } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+               use_temporary(code, src.Index);
+               return src.Index;
+       }
+
+       return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+       PROG_CODE;
+
+       if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+               code->inst[ip].inst0 |= R500_INST_NOP;
+       }
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+       int ip;
+       PROG_CODE;
+
+       if (code->inst_end >= c->Base.max_alu_insts-1) {
+               error("emit_alu: Too many instructions");
+               return;
+       }
+
+       ip = ++code->inst_end;
+
+       /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+       if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+               inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+               if (ip > 0) {
+                       alu_nop(c, ip - 1);
+               }
+       }
+
+       code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+       code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+       if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+               code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+               if (inst->WriteALUResult) {
+                       error("Cannot write output and ALU result at the same time");
+                       return;
+               }
+       } else {
+               code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+       }
+       code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+       code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
+       code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
+       code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+       if (inst->Nop) {
+               code->inst[ip].inst0 |= R500_INST_NOP;
+       }
+       if (inst->Alpha.DepthWriteMask) {
+               code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+               c->code->writes_depth = 1;
+       }
+
+       code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+       code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+       use_temporary(code, inst->Alpha.DestIndex);
+       use_temporary(code, inst->RGB.DestIndex);
+
+       if (inst->RGB.Saturate)
+               code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+       if (inst->Alpha.Saturate)
+               code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+       /* Set the presubtract operation. */
+       switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+                       break;
+               default:
+                       break;
+       }
+       switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+                       break;
+               default:
+                       break;
+       }
+
+       code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+       code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+       code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+       code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+       code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+       code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+       code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+       code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+       code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+       code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+       code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+       code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+       code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+       code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+       if (inst->WriteALUResult) {
+               code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+               if (inst->WriteALUResult == RC_ALURESULT_X)
+                       code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+               else
+                       code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+               code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+       }
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+       unsigned int swiz = 0;
+       int i;
+       for (i = 0; i < 4; i++)
+               swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+       return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+       int ip;
+       PROG_CODE;
+
+       if (code->inst_end >= c->Base.max_alu_insts-1) {
+               error("emit_tex: Too many instructions");
+               return 0;
+       }
+
+       ip = ++code->inst_end;
+
+       code->inst[ip].inst0 = R500_INST_TYPE_TEX
+               | (inst->DstReg.WriteMask << 11)
+               | R500_INST_TEX_SEM_WAIT;
+       code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+               | R500_TEX_SEM_ACQUIRE;
+
+       if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+               code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+       switch (inst->Opcode) {
+       case RC_OPCODE_KIL:
+               code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+               break;
+       case RC_OPCODE_TEX:
+               code->inst[ip].inst1 |= R500_TEX_INST_LD;
+               break;
+       case RC_OPCODE_TXB:
+               code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+               break;
+       case RC_OPCODE_TXP:
+               code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+               break;
+       case RC_OPCODE_TXD:
+               code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
+               break;
+       case RC_OPCODE_TXL:
+               code->inst[ip].inst1 |= R500_TEX_INST_LOD;
+               break;
+       default:
+               error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+       }
+
+       use_temporary(code, inst->SrcReg[0].Index);
+       if (inst->Opcode != RC_OPCODE_KIL)
+               use_temporary(code, inst->DstReg.Index);
+
+       code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+               | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+               | R500_TEX_DST_ADDR(inst->DstReg.Index)
+               | (GET_SWZ(inst->TexSwizzle, 0) << 24)
+               | (GET_SWZ(inst->TexSwizzle, 1) << 26)
+               | (GET_SWZ(inst->TexSwizzle, 2) << 28)
+               | (GET_SWZ(inst->TexSwizzle, 3) << 30)
+               ;
+
+       if (inst->Opcode == RC_OPCODE_TXD) {
+               use_temporary(code, inst->SrcReg[1].Index);
+               use_temporary(code, inst->SrcReg[2].Index);
+
+               /* DX and DY parameters are specified in a separate register. */
+               code->inst[ip].inst3 =
+                       R500_DX_ADDR(inst->SrcReg[1].Index) |
+                       (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
+                       R500_DY_ADDR(inst->SrcReg[2].Index) |
+                       (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
+       }
+
+       return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+       unsigned int newip;
+
+       if (s->Code->inst_end >= s->C->max_alu_insts-1) {
+               rc_error(s->C, "emit_tex: Too many instructions");
+               return;
+       }
+
+       newip = ++s->Code->inst_end;
+
+       /* Currently all loops use the same integer constant to intialize
+        * the loop variables. */
+       if(!s->Code->int_constants[0]) {
+               s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+               s->Code->int_constant_count = 1;
+       }
+       s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+       switch(inst->U.I.Opcode){
+       struct branch_info * branch;
+       struct r500_loop_info * loop;
+       case RC_OPCODE_BGNLOOP:
+               memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
+                       s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
+
+               loop = &s->Loops[s->CurrentLoopDepth++];
+               memset(loop, 0, sizeof(struct r500_loop_info));
+               loop->BranchDepth = s->CurrentBranchDepth;
+               loop->BgnLoop = newip;
+
+               s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+                       | R500_FC_JUMP_FUNC(0x00)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
+               break;
+       case RC_OPCODE_BRK:
+               loop = &s->Loops[s->CurrentLoopDepth - 1];
+               memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+                                       loop->BrkCount, loop->BrkReserved, 1);
+
+               loop->Brks[loop->BrkCount++] = newip;
+               s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_B_OP1_DECR
+                       | R500_FC_B_POP_CNT(
+                               s->CurrentBranchDepth - loop->BranchDepth)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
+               break;
+
+       case RC_OPCODE_CONT:
+               loop = &s->Loops[s->CurrentLoopDepth - 1];
+               memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+                                       loop->ContCount, loop->ContReserved, 1);
+               loop->Conts[loop->ContCount++] = newip;
+               s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_B_OP1_DECR
+                       | R500_FC_B_POP_CNT(
+                               s->CurrentBranchDepth - loop->BranchDepth)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
+               break;
+
+       case RC_OPCODE_ENDLOOP:
+       {
+               loop = &s->Loops[s->CurrentLoopDepth - 1];
+               /* Emit ENDLOOP */
+               s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_JUMP_ANY
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
+               /* The constant integer at index 0 is used by all loops. */
+               s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+                       | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+                       ;
+
+               /* Set jump address and int constant for BGNLOOP */
+               s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+                       | R500_FC_JUMP_ADDR(newip)
+                       ;
+
+               /* Set jump address for the BRK instructions. */
+               while(loop->BrkCount--) {
+                       s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+                                               R500_FC_JUMP_ADDR(newip + 1);
+               }
+
+               /* Set jump address for CONT instructions. */
+               while(loop->ContCount--) {
+                       s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+                                               R500_FC_JUMP_ADDR(newip);
+               }
+               s->CurrentLoopDepth--;
+               break;
+       }
+       case RC_OPCODE_IF:
+               if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+                       rc_error(s->C, "Branch depth exceeds hardware limit");
+                       return;
+               }
+               memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+                               s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+               branch = &s->Branches[s->CurrentBranchDepth++];
+               branch->If = newip;
+               branch->Else = -1;
+               branch->Endif = -1;
+
+               if (s->CurrentBranchDepth > s->MaxBranchDepth)
+                       s->MaxBranchDepth = s->CurrentBranchDepth;
+
+               /* actual instruction is filled in at ENDIF time */
+               break;
+       
+       case RC_OPCODE_ELSE:
+               if (!s->CurrentBranchDepth) {
+                       rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+                       return;
+               }
+
+               branch = &s->Branches[s->CurrentBranchDepth - 1];
+               branch->Else = newip;
+
+               /* actual instruction is filled in at ENDIF time */
+               break;
+
+       case RC_OPCODE_ENDIF:
+               if (!s->CurrentBranchDepth) {
+                       rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+                       return;
+               }
+
+               branch = &s->Branches[s->CurrentBranchDepth - 1];
+               branch->Endif = newip;
+
+               s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+                       | R500_FC_A_OP_NONE /* no address stack */
+                       | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+                       | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+                       | R500_FC_B_OP1_NONE /* no branch counter if stay */
+                       | R500_FC_B_POP_CNT(1)
+                       ;
+               s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+               s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+                       | R500_FC_A_OP_NONE /* no address stack */
+                       | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+                       | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+                       | R500_FC_IGNORE_UNCOVERED
+               ;
+
+               if (branch->Else >= 0) {
+                       /* increment branch counter also if jump */
+                       s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+                       s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+                       s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+                               | R500_FC_A_OP_NONE /* no address stack */
+                               | R500_FC_B_ELSE /* all active pixels want to jump */
+                               | R500_FC_B_OP0_NONE /* no counter op if stay */
+                               | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+                               | R500_FC_B_POP_CNT(1)
+                       ;
+                       s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+               } else {
+                       /* don't touch branch counter on jump */
+                       s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+                       s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+               }
+
+
+               s->CurrentBranchDepth--;
+               break;
+       default:
+               rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+       }
+}
+
+void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+       struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+       struct emit_state s;
+       struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+       memset(&s, 0, sizeof(s));
+       s.C = &compiler->Base;
+       s.Code = code;
+
+       memset(code, 0, sizeof(*code));
+       code->max_temp_idx = 1;
+       code->inst_end = -1;
+
+       for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+           inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+           inst = inst->Next) {
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+                       if (opcode->IsFlowControl) {
+                               emit_flowcontrol(&s, inst);
+                       } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+                               continue;
+                       } else {
+                               emit_tex(compiler, &inst->U.I);
+                       }
+               } else {
+                       emit_paired(compiler, &inst->U.P);
+               }
+       }
+
+       if (code->max_temp_idx >= compiler->Base.max_temp_regs)
+               rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+       if (compiler->Base.Error)
+               return;
+
+       if (code->inst_end == -1 ||
+           (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+               int ip;
+
+               /* This may happen when dead-code elimination is disabled or
+                * when most of the fragment program logic is leading to a KIL */
+               if (code->inst_end >= compiler->Base.max_alu_insts-1) {
+                       rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+                       return;
+               }
+
+               ip = ++code->inst_end;
+               code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+       }
+
+       /* Enable full flow control mode if we are using loops or have if
+        * statements nested at least four deep. */
+       if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
+               if (code->max_temp_idx < 1)
+                       code->max_temp_idx = 1;
+
+               code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c
new file mode 100644 (file)
index 0000000..6842fb8
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+       memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+       dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+       memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+       dst->Count = src->Count;
+       dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+       free(c->Constants);
+       memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+       unsigned index = c->Count;
+
+       if (c->Count >= c->_Reserved) {
+               struct rc_constant * newlist;
+
+               c->_Reserved = c->_Reserved * 2;
+               if (!c->_Reserved)
+                       c->_Reserved = 16;
+
+               newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+               memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+               free(c->Constants);
+               c->Constants = newlist;
+       }
+
+       c->Constants[index] = *constant;
+       c->Count++;
+
+       return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+       unsigned index;
+       struct rc_constant constant;
+
+       for(index = 0; index < c->Count; ++index) {
+               if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+                       if (c->Constants[index].u.State[0] == state0 &&
+                           c->Constants[index].u.State[1] == state1)
+                               return index;
+               }
+       }
+
+       memset(&constant, 0, sizeof(constant));
+       constant.Type = RC_CONSTANT_STATE;
+       constant.Size = 4;
+       constant.u.State[0] = state0;
+       constant.u.State[1] = state1;
+
+       return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+       unsigned index;
+       struct rc_constant constant;
+
+       for(index = 0; index < c->Count; ++index) {
+               if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+                       if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+                               return index;
+               }
+       }
+
+       memset(&constant, 0, sizeof(constant));
+       constant.Type = RC_CONSTANT_IMMEDIATE;
+       constant.Size = 4;
+       memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+       return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+       unsigned index;
+       int free_index = -1;
+       struct rc_constant constant;
+
+       for(index = 0; index < c->Count; ++index) {
+               if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+                       unsigned comp;
+                       for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+                               if (c->Constants[index].u.Immediate[comp] == data) {
+                                       *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+                                       return index;
+                               }
+                       }
+
+                       if (c->Constants[index].Size < 4)
+                               free_index = index;
+               }
+       }
+
+       if (free_index >= 0) {
+               unsigned comp = c->Constants[free_index].Size++;
+               c->Constants[free_index].u.Immediate[comp] = data;
+               *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+               return free_index;
+       }
+
+       memset(&constant, 0, sizeof(constant));
+       constant.Type = RC_CONSTANT_IMMEDIATE;
+       constant.Size = 1;
+       constant.u.Immediate[0] = data;
+       *swizzle = RC_SWIZZLE_XXXX;
+
+       return rc_constants_add(c, &constant);
+}
+
+void rc_constants_print(struct rc_constant_list * c)
+{
+       unsigned int i;
+       for(i = 0; i < c->Count; i++) {
+               if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+                       float * values = c->Constants[i].u.Immediate;
+                       fprintf(stderr, "CONST[%u] = "
+                               "{ %10.4f %10.4f %10.4f %10.4f }\n",
+                               i, values[0],values[1], values[2], values[3]);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h
new file mode 100644 (file)
index 0000000..67e6acf
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST     64
+#define R300_PFS_MAX_TEX_INST     32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS    32
+#define R300_PFS_NUM_CONST_REGS   32
+
+#define R400_PFS_MAX_ALU_INST     512
+#define R400_PFS_MAX_TEX_INST     512
+
+#define R500_PFS_MAX_INST         512
+#define R500_PFS_NUM_TEMP_REGS    128
+#define R500_PFS_NUM_CONST_REGS   256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+       /**
+        * External constants are constants whose meaning is unknown to this
+        * compiler. For example, a Mesa gl_program's constants are turned
+        * into external constants.
+        */
+       RC_CONSTANT_EXTERNAL = 0,
+
+       RC_CONSTANT_IMMEDIATE,
+
+       /**
+        * Constant referring to state that is known by this compiler,
+        * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+        */
+       RC_CONSTANT_STATE
+};
+
+enum {
+       RC_STATE_SHADOW_AMBIENT = 0,
+
+       RC_STATE_R300_WINDOW_DIMENSION,
+       RC_STATE_R300_TEXRECT_FACTOR,
+       RC_STATE_R300_TEXSCALE_FACTOR,
+       RC_STATE_R300_VIEWPORT_SCALE,
+       RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+       unsigned Type:2; /**< RC_CONSTANT_xxx */
+       unsigned Size:3;
+
+       union {
+               unsigned External;
+               float Immediate[4];
+               unsigned State[2];
+       } u;
+};
+
+struct rc_constant_list {
+       struct rc_constant * Constants;
+       unsigned Count;
+
+       unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+void rc_constants_print(struct rc_constant_list * c);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+       RC_COMPARE_FUNC_NEVER = 0,
+       RC_COMPARE_FUNC_LESS,
+       RC_COMPARE_FUNC_EQUAL,
+       RC_COMPARE_FUNC_LEQUAL,
+       RC_COMPARE_FUNC_GREATER,
+       RC_COMPARE_FUNC_NOTEQUAL,
+       RC_COMPARE_FUNC_GEQUAL,
+       RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+       RC_WRAP_NONE = 0,
+       RC_WRAP_REPEAT,
+       RC_WRAP_MIRRORED_REPEAT,
+       RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+       struct {
+               /**
+                * This field contains swizzle for some lowering passes
+                * (shadow comparison, unorm->snorm conversion)
+                */
+               unsigned texture_swizzle:12;
+
+               /**
+                * If the sampler is used as a shadow sampler,
+                * this field specifies the compare function.
+                *
+                * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+                * \sa rc_compare_func
+                */
+               unsigned texture_compare_func : 3;
+
+               /**
+                * No matter what the sampler type is,
+                * this field turns it into a shadow sampler.
+                */
+               unsigned compare_mode_enabled : 1;
+
+               /**
+                * If the sampler will receive non-normalized coords,
+                * this field is set. The scaling factor is given by
+                * RC_STATE_R300_TEXRECT_FACTOR.
+                */
+               unsigned non_normalized_coords : 1;
+
+               /**
+                * This field specifies wrapping modes for the sampler.
+                *
+                * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+                * will be performed on the coordinates.
+                */
+               unsigned wrap_mode : 3;
+
+               /**
+                * The coords are scaled after applying the wrap mode emulation
+                * and right before texture fetch. The scaling factor is given by
+                * RC_STATE_R300_TEXSCALE_FACTOR. */
+               unsigned clamp_and_scale_before_fetch : 1;
+
+               /**
+                * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
+                * in the shader.
+                */
+               unsigned convert_unorm_to_snorm:1;
+       } unit[16];
+
+       unsigned frag_clamp:1;
+};
+
+
+
+struct r300_fragment_program_node {
+       int tex_offset; /**< first tex instruction */
+       int tex_end; /**< last tex instruction, relative to tex_offset */
+       int alu_offset; /**< first ALU instruction */
+       int alu_end; /**< last ALU instruction, relative to alu_offset */
+       int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+       struct {
+               unsigned int length; /**< total # of texture instructions used */
+               uint32_t inst[R400_PFS_MAX_TEX_INST];
+       } tex;
+
+       struct {
+               unsigned int length; /**< total # of ALU instructions used */
+               struct {
+                       uint32_t rgb_inst;
+                       uint32_t rgb_addr;
+                       uint32_t alpha_inst;
+                       uint32_t alpha_addr;
+                       uint32_t r400_ext_addr;
+               } inst[R400_PFS_MAX_ALU_INST];
+       } alu;
+
+       uint32_t config; /* US_CONFIG */
+       uint32_t pixsize; /* US_PIXSIZE */
+       uint32_t code_offset; /* US_CODE_OFFSET */
+       uint32_t r400_code_offset_ext; /* US_CODE_EXT */
+       uint32_t code_addr[4]; /* US_CODE_ADDR */
+       /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+        * for r400 cards */
+       unsigned int r390_mode:1;
+};
+
+
+struct r500_fragment_program_code {
+       struct {
+               uint32_t inst0;
+               uint32_t inst1;
+               uint32_t inst2;
+               uint32_t inst3;
+               uint32_t inst4;
+               uint32_t inst5;
+       } inst[R500_PFS_MAX_INST];
+
+       int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+       int max_temp_idx;
+
+       uint32_t us_fc_ctrl;
+
+       uint32_t int_constants[32];
+       uint32_t int_constant_count;
+};
+
+struct rX00_fragment_program_code {
+       union {
+               struct r300_fragment_program_code r300;
+               struct r500_fragment_program_code r500;
+       } code;
+
+       unsigned writes_depth:1;
+
+       struct rc_constant_list constants;
+       unsigned *constants_remap_table;
+};
+
+
+#define R300_VS_MAX_ALU                256
+#define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
+#define R500_VS_MAX_ALU                1024
+#define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
+#define R300_VS_MAX_TEMPS      32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+       int length;
+       union {
+               uint32_t d[R500_VS_MAX_ALU_DWORDS];
+               float f[R500_VS_MAX_ALU_DWORDS];
+       } body;
+
+       int pos_end;
+       int num_temporaries;    /* Number of temp vars used by program */
+       int inputs[VSF_MAX_INPUTS];
+       int outputs[VSF_MAX_OUTPUTS];
+
+       struct rc_constant_list constants;
+       unsigned *constants_remap_table;
+
+       uint32_t InputsRead;
+       uint32_t OutputsWritten;
+
+       unsigned int num_fc_ops;
+       uint32_t fc_ops;
+       union {
+               uint32_t r300[R300_VS_MAX_FC_OPS];
+               struct {
+                       uint32_t lw;
+                       uint32_t uw;
+               } r500[R300_VS_MAX_FC_OPS];
+       } fc_op_addrs;
+       int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c
new file mode 100644 (file)
index 0000000..b793672
--- /dev/null
@@ -0,0 +1,489 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+       memset(c, 0, sizeof(*c));
+
+       memory_pool_init(&c->Pool);
+       c->Program.Instructions.Prev = &c->Program.Instructions;
+       c->Program.Instructions.Next = &c->Program.Instructions;
+       c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+       rc_constants_destroy(&c->Program.Constants);
+       memory_pool_destroy(&c->Pool);
+       free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+       va_list ap;
+
+       if (!(c->Debug & RC_DBG_LOG))
+               return;
+
+       va_start(ap, fmt);
+       vfprintf(stderr, fmt, ap);
+       va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+       va_list ap;
+
+       c->Error = 1;
+
+       if (!c->ErrorMsg) {
+               /* Only remember the first error */
+               char buf[1024];
+               int written;
+
+               va_start(ap, fmt);
+               written = vsnprintf(buf, sizeof(buf), fmt, ap);
+               va_end(ap);
+
+               if (written < sizeof(buf)) {
+                       c->ErrorMsg = strdup(buf);
+               } else {
+                       c->ErrorMsg = malloc(written + 1);
+
+                       va_start(ap, fmt);
+                       vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+                       va_end(ap);
+               }
+       }
+
+       if (c->Debug & RC_DBG_LOG) {
+               fprintf(stderr, "r300compiler error: ");
+
+               va_start(ap, fmt);
+               vfprintf(stderr, fmt, ap);
+               va_end(ap);
+       }
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+       rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+       return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+       struct rc_instruction *inst;
+
+       c->Program.InputsRead = 0;
+       c->Program.OutputsWritten = 0;
+
+       for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+       {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               int i;
+
+               for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+                               c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+               }
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+                               c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+               }
+       }
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+       struct rc_instruction * inst;
+
+       c->Program.InputsRead &= ~(1 << input);
+
+       for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               unsigned i;
+
+               for(i = 0; i < opcode->NumSrcRegs; ++i) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+                               inst->U.I.SrcReg[i].File = new_input.File;
+                               inst->U.I.SrcReg[i].Index = new_input.Index;
+                               inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+                               if (!inst->U.I.SrcReg[i].Abs) {
+                                       inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+                                       inst->U.I.SrcReg[i].Abs = new_input.Abs;
+                               }
+
+                               c->Program.InputsRead |= 1 << new_input.Index;
+                       }
+               }
+       }
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+       struct rc_instruction * inst;
+
+       c->Program.OutputsWritten &= ~(1 << output);
+
+       for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+                               inst->U.I.DstReg.Index = new_output;
+                               inst->U.I.DstReg.WriteMask &= writemask;
+
+                               c->Program.OutputsWritten |= 1 << new_output;
+                       }
+               }
+       }
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+       unsigned tempreg = rc_find_free_temporary(c);
+       struct rc_instruction * inst;
+
+       for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+                               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst->U.I.DstReg.Index = tempreg;
+                       }
+               }
+       }
+
+       inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+       inst->U.I.Opcode = RC_OPCODE_MOV;
+       inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+       inst->U.I.DstReg.Index = output;
+
+       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[0].Index = tempreg;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+       inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+       inst->U.I.Opcode = RC_OPCODE_MOV;
+       inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+       inst->U.I.DstReg.Index = dup_output;
+
+       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[0].Index = tempreg;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+       c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform)
+{
+       unsigned tempregi = rc_find_free_temporary(c);
+       struct rc_instruction * inst_rcp;
+       struct rc_instruction * inst_mul;
+       struct rc_instruction * inst_mad;
+       struct rc_instruction * inst;
+
+       c->Program.InputsRead &= ~(1 << wpos);
+       c->Program.InputsRead |= 1 << new_input;
+
+       /* perspective divide */
+       inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+       inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+       inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_rcp->U.I.DstReg.Index = tempregi;
+       inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+       inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+       inst_rcp->U.I.SrcReg[0].Index = new_input;
+       inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+       inst_mul = rc_insert_new_instruction(c, inst_rcp);
+       inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+       inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_mul->U.I.DstReg.Index = tempregi;
+       inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+       inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+       inst_mul->U.I.SrcReg[0].Index = new_input;
+
+       inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+       inst_mul->U.I.SrcReg[1].Index = tempregi;
+       inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+       /* viewport transformation */
+       inst_mad = rc_insert_new_instruction(c, inst_mul);
+       inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+       inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_mad->U.I.DstReg.Index = tempregi;
+       inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+       inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst_mad->U.I.SrcReg[0].Index = tempregi;
+       inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+       inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+       inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+       inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+       inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+       if (full_vtransform) {
+               inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+               inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+       } else {
+               inst_mad->U.I.SrcReg[1].Index =
+               inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+       }
+
+       for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               unsigned i;
+
+               for(i = 0; i < opcode->NumSrcRegs; i++) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+                           inst->U.I.SrcReg[i].Index == wpos) {
+                               inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+                               inst->U.I.SrcReg[i].Index = tempregi;
+                       }
+               }
+       }
+}
+
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+       unsigned tempregi = rc_find_free_temporary(c);
+       struct rc_instruction *inst_add;
+       struct rc_instruction *inst;
+
+       /* perspective divide */
+       inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+       inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+       inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_add->U.I.DstReg.Index = tempregi;
+       inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+       inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+       inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+       inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+       inst_add->U.I.SrcReg[1].Index = face;
+       inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+       inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+       for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               unsigned i;
+
+               for(i = 0; i < opcode->NumSrcRegs; i++) {
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+                           inst->U.I.SrcReg[i].Index == face) {
+                               inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+                               inst->U.I.SrcReg[i].Index = tempregi;
+                       }
+               }
+       }
+}
+
+static void reg_count_callback(void * userdata, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       int *max_reg = userdata;
+       if (file == RC_FILE_TEMPORARY)
+               (int)index > *max_reg ? *max_reg = index : 0;
+}
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
+{
+       int max_reg = -1;
+       struct rc_instruction * tmp;
+       memset(s, 0, sizeof(*s));
+
+       for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
+                                                       tmp = tmp->Next){
+               const struct rc_opcode_info * info;
+               rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+               if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+                       info = rc_get_opcode_info(tmp->U.I.Opcode);
+                       if (info->Opcode == RC_OPCODE_BEGIN_TEX)
+                               continue;
+                       if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+                               s->num_presub_ops++;
+               } else {
+                       if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+                               s->num_presub_ops++;
+                       if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+                               s->num_presub_ops++;
+                       /* Assuming alpha will never be a flow control or
+                        * a tex instruction. */
+                       if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
+                               s->num_alpha_insts++;
+                       if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
+                               s->num_rgb_insts++;
+                       info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
+               }
+               if (info->IsFlowControl)
+                       s->num_fc_insts++;
+               if (info->HasTexture)
+                       s->num_tex_insts++;
+               s->num_insts++;
+       }
+       s->num_temp_regs = max_reg + 1;
+}
+
+static void print_stats(struct radeon_compiler * c)
+{
+       struct rc_program_stats s;
+
+       if (c->initial_num_insts <= 5)
+               return;
+
+       rc_get_stats(c, &s);
+
+       switch (c->type) {
+       case RC_VERTEX_PROGRAM:
+               fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+                              "~%4u Instructions\n"
+                              "~%4u Flow Control Instructions\n"
+                              "~%4u Temporary Registers\n"
+                              "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+                              s.num_insts, s.num_fc_insts, s.num_temp_regs);
+               break;
+
+       case RC_FRAGMENT_PROGRAM:
+               fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+                              "~%4u Instructions\n"
+                              "~%4u Vector Instructions (RGB)\n"
+                              "~%4u Scalar Instructions (Alpha)\n"
+                              "~%4u Flow Control Instructions\n"
+                              "~%4u Texture Instructions\n"
+                              "~%4u Presub Operations\n"
+                              "~%4u Temporary Registers\n"
+                              "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+                              s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+                              s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+                              s.num_temp_regs);
+               break;
+       default:
+               assert(0);
+       }
+}
+
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+       "Vertex Program",
+       "Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+       for (unsigned i = 0; list[i].name; i++) {
+               if (list[i].predicate) {
+                       list[i].run(c, list[i].user);
+
+                       if (c->Error)
+                               return;
+
+                       if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
+                               fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
+                               rc_print_program(&c->Program);
+                       }
+               }
+       }
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+       struct rc_program_stats s;
+
+       rc_get_stats(c, &s);
+       c->initial_num_insts = s.num_insts;
+
+       if (c->Debug & RC_DBG_LOG) {
+               fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+               rc_print_program(&c->Program);
+       }
+
+       rc_run_compiler_passes(c, list);
+
+       if (c->Debug & RC_DBG_STATS)
+               print_stats(c);
+}
+
+void rc_validate_final_shader(struct radeon_compiler *c, void *user)
+{
+       /* Check the number of constants. */
+       if (c->Program.Constants.Count > c->max_constants) {
+               rc_error(c, "Too many constants. Max: %i, Got: %i\n",
+                        c->max_constants, c->Program.Constants.Count);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
new file mode 100644 (file)
index 0000000..74594af
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+#include "radeon_emulate_loops.h"
+
+#define RC_DBG_LOG        (1 << 0)
+#define RC_DBG_STATS      (1 << 1)
+
+struct rc_swizzle_caps;
+
+enum rc_program_type {
+       RC_VERTEX_PROGRAM,
+       RC_FRAGMENT_PROGRAM,
+       RC_NUM_PROGRAM_TYPES
+};
+
+struct radeon_compiler {
+       struct memory_pool Pool;
+       struct rc_program Program;
+       enum rc_program_type type;
+       unsigned Debug:2;
+       unsigned Error:1;
+       char * ErrorMsg;
+
+       /* Hardware specification. */
+       unsigned is_r400:1;
+       unsigned is_r500:1;
+       unsigned has_half_swizzles:1;
+       unsigned has_presub:1;
+       unsigned disable_optimizations:1;
+       unsigned max_temp_regs;
+       unsigned max_constants;
+       int max_alu_insts;
+       unsigned max_tex_insts;
+
+       /* Whether to remove unused constants and empty holes in constant space. */
+       unsigned remove_unused_constants:1;
+
+       /**
+        * Variables used internally, not be touched by callers
+        * of the compiler
+        */
+       /*@{*/
+       struct rc_swizzle_caps * SwizzleCaps;
+       /*@}*/
+
+       struct emulate_loop_state loop_state;
+
+       unsigned initial_num_insts; /* Number of instructions at start. */
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ *  if (rc_assert(c, condition-that-must-be-true))
+ *     return;
+ */
+#define rc_assert(c, cond) \
+       (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
+
+struct r300_fragment_program_compiler {
+       struct radeon_compiler Base;
+       struct rX00_fragment_program_code *code;
+       /* Optional transformations and features. */
+       struct r300_fragment_program_external_state state;
+       unsigned enable_shadow_ambient;
+       /* Register corresponding to the depthbuffer. */
+       unsigned OutputDepth;
+       /* Registers corresponding to the four colorbuffers. */
+       unsigned OutputColor[4];
+
+       void * UserData;
+       void (*AllocateHwInputs)(
+               struct r300_fragment_program_compiler * c,
+               void (*allocate)(void * data, unsigned input, unsigned hwreg),
+               void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+struct r300_vertex_program_compiler {
+       struct radeon_compiler Base;
+       struct r300_vertex_program_code *code;
+       uint32_t RequiredOutputs;
+
+       void * UserData;
+       void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+       int PredicateIndex;
+       unsigned int PredicateMask;
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
+
+struct radeon_compiler_pass {
+       const char *name;       /* Name of the pass. */
+       int dump;               /* Dump the program if Debug == 1? */
+       int predicate;          /* Run this pass? */
+       void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
+       void *user;             /* Optional parameter which is passed to the run function. */
+};
+
+struct rc_program_stats {
+       unsigned num_insts;
+       unsigned num_fc_insts;
+       unsigned num_tex_insts;
+       unsigned num_rgb_insts;
+       unsigned num_alpha_insts;
+       unsigned num_presub_ops;
+       unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_validate_final_shader(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
new file mode 100644 (file)
index 0000000..2742721
--- /dev/null
@@ -0,0 +1,701 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+       unsigned int mask = 0;
+       unsigned int i;
+
+       for(i = 0; i < 4; i++) {
+               mask |= 1 << GET_SWZ(swz, i);
+       }
+       mask &= RC_MASK_XYZW;
+
+       return mask;
+}
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+       if (idx & 0x4)
+               return idx;
+       return GET_SWZ(swz, idx);
+}
+
+/**
+ * The purpose of this function is to standardize the number channels used by
+ * swizzles.  All swizzles regardless of what instruction they are a part of
+ * should have 4 channels initialized with values.
+ * @param channels The number of channels in initial_value that have a
+ * meaningful value.
+ * @return An initialized swizzle that has all of the unused channels set to
+ * RC_SWIZZLE_UNUSED.
+ */
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
+{
+       unsigned int i;
+       for (i = channels; i < 4; i++) {
+               SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
+       }
+       return initial_value;
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+               rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+       unsigned int ret = 0;
+
+       ret |= get_swz(src, swz_x);
+       ret |= get_swz(src, swz_y) << 3;
+       ret |= get_swz(src, swz_z) << 6;
+       ret |= get_swz(src, swz_w) << 9;
+
+       return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+       unsigned int ret = 0;
+
+       ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+       ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+       ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+       ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+       return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+       switch (mask) {
+       case RC_MASK_X: return RC_SWIZZLE_X;
+       case RC_MASK_Y: return RC_SWIZZLE_Y;
+       case RC_MASK_Z: return RC_SWIZZLE_Z;
+       case RC_MASK_W: return RC_SWIZZLE_W;
+       }
+       return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+       unsigned ret = 0;
+       for (unsigned chan = 0; chan < 4; ++chan) {
+               unsigned swz = GET_SWZ(swizzle, chan);
+               if (swz < 4)
+                       ret |= GET_BIT(mask, swz) << chan;
+       }
+       return ret;
+}
+
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+       if (info->HasTexture) {
+               return 0;
+       }
+       switch (info->Opcode) {
+               case RC_OPCODE_DP2:
+               case RC_OPCODE_DP3:
+               case RC_OPCODE_DP4:
+               case RC_OPCODE_DDX:
+               case RC_OPCODE_DDY:
+                       return 0;
+               default:
+                       return 1;
+       }
+}
+
+/**
+ * @return A swizzle the results from converting old_swizzle using
+ * conversion_swizzle
+ */
+unsigned int rc_adjust_channels(
+       unsigned int old_swizzle,
+       unsigned int conversion_swizzle)
+{
+       unsigned int i;
+       unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+       for (i = 0; i < 4; i++) {
+               unsigned int new_chan = get_swz(conversion_swizzle, i);
+               if (new_chan == RC_SWIZZLE_UNUSED) {
+                       continue;
+               }
+               SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+       }
+       return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+       unsigned int old_mask,
+       unsigned int conversion_swizzle)
+{
+       unsigned int new_mask = 0;
+       unsigned int i;
+
+       for (i = 0; i < 4; i++) {
+               if (!GET_BIT(old_mask, i)
+                  || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+                       continue;
+               }
+               new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+       }
+
+       return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask.  For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int conversion_swizzle)
+{
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+       unsigned int i;
+
+       sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+       if (!srcs_need_rewrite(info)) {
+               return ;
+       }
+
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               sub->Arg[i].Swizzle =
+                       rc_adjust_channels(sub->Arg[i].Swizzle,
+                                               conversion_swizzle);
+       }
+}
+
+static void normal_rewrite_writemask_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       unsigned int * new_mask = (unsigned int *)userdata;
+       src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+       struct rc_instruction * inst,
+       unsigned int conversion_swizzle)
+{
+       unsigned int new_mask;
+       struct rc_sub_instruction * sub = &inst->U.I;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+       sub->DstReg.WriteMask =
+               rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+       if (info->HasTexture) {
+               unsigned int i;
+               assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+               for (i = 0; i < 4; i++) {
+                       unsigned int swz = GET_SWZ(conversion_swizzle, i);
+                       if (swz > 3)
+                               continue;
+                       SET_SWZ(sub->TexSwizzle, swz, i);
+               }
+       }
+
+       if (!srcs_need_rewrite(info)) {
+               return;
+       }
+
+       new_mask = sub->DstReg.WriteMask;
+       rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz).  So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9).  If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7).  If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+       unsigned int swizzle,
+       unsigned int conversion_swizzle)
+{
+       unsigned int chan;
+       unsigned int out_swizzle = swizzle;
+
+       for (chan = 0; chan < 4; chan++) {
+               unsigned int swz = GET_SWZ(swizzle, chan);
+               unsigned int new_swz;
+               if (swz > 3) {
+                       SET_SWZ(out_swizzle, chan, swz);
+               } else {
+                       new_swz = GET_SWZ(conversion_swizzle, swz);
+                       if (new_swz != RC_SWIZZLE_UNUSED) {
+                               SET_SWZ(out_swizzle, chan, new_swz);
+                       } else {
+                               SET_SWZ(out_swizzle, chan, swz);
+                       }
+               }
+       }
+       return out_swizzle;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+       struct rc_src_register tmp = srcreg;
+       int i;
+       tmp.Swizzle = 0;
+       tmp.Negate = 0;
+       for(i = 0; i < 4; ++i) {
+               rc_swizzle swz = GET_SWZ(swizzle, i);
+               if (swz < 4) {
+                       tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+                       tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+               } else {
+                       tmp.Swizzle |= swz << (i*3);
+               }
+       }
+       return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+       memset(reg, 0, sizeof(struct rc_src_register));
+       reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+unsigned int rc_src_reads_dst_mask(
+               rc_register_file src_file,
+               unsigned int src_idx,
+               unsigned int src_swz,
+               rc_register_file dst_file,
+               unsigned int dst_idx,
+               unsigned int dst_mask)
+{
+       if (src_file != dst_file || src_idx != dst_idx) {
+               return RC_MASK_NONE;
+       }
+       return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
+
+/**
+ * @return A bit mask specifying whether this swizzle will select from an RGB
+ * source, an Alpha source, or both.
+ */
+unsigned int rc_source_type_swz(unsigned int swizzle)
+{
+       unsigned int chan;
+       unsigned int swz = RC_SWIZZLE_UNUSED;
+       unsigned int ret = RC_SOURCE_NONE;
+
+       for(chan = 0; chan < 4; chan++) {
+               swz = GET_SWZ(swizzle, chan);
+               if (swz == RC_SWIZZLE_W) {
+                       ret |= RC_SOURCE_ALPHA;
+               } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+                                               || swz == RC_SWIZZLE_Z) {
+                       ret |= RC_SOURCE_RGB;
+               }
+       }
+       return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+       unsigned int ret = RC_SOURCE_NONE;
+
+       if (mask & RC_MASK_XYZ)
+               ret |= RC_SOURCE_RGB;
+
+       if (mask & RC_MASK_W)
+               ret |= RC_SOURCE_ALPHA;
+
+       return ret;
+}
+
+struct src_select {
+       rc_register_file File;
+       int Index;
+       unsigned int SrcType;
+};
+
+struct can_use_presub_data {
+       struct src_select Selects[5];
+       unsigned int SelectCount;
+       const struct rc_src_register * ReplaceReg;
+       unsigned int ReplaceRemoved;
+};
+
+static void can_use_presub_data_add_select(
+       struct can_use_presub_data * data,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int src_type)
+{
+       struct src_select * select;
+
+       select = &data->Selects[data->SelectCount++];
+       select->File = file;
+       select->Index = index;
+       select->SrcType = src_type;
+}
+
+/**
+ * This callback function counts the number of sources in inst that are
+ * different from the sources in can_use_presub_data->RemoveSrcs.
+ */
+static void can_use_presub_read_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       struct can_use_presub_data * d = userdata;
+
+       if (!d->ReplaceRemoved && src == d->ReplaceReg) {
+               d->ReplaceRemoved = 1;
+               return;
+       }
+
+       if (src->File == RC_FILE_NONE)
+               return;
+
+       can_use_presub_data_add_select(d, src->File, src->Index,
+                                       rc_source_type_swz(src->Swizzle));
+}
+
+unsigned int rc_inst_can_use_presub(
+       struct rc_instruction * inst,
+       rc_presubtract_op presub_op,
+       unsigned int presub_writemask,
+       const struct rc_src_register * replace_reg,
+       const struct rc_src_register * presub_src0,
+       const struct rc_src_register * presub_src1)
+{
+       struct can_use_presub_data d;
+       unsigned int num_presub_srcs;
+       unsigned int i;
+       const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+       int rgb_count = 0, alpha_count = 0;
+       unsigned int src_type0, src_type1;
+
+       if (presub_op == RC_PRESUB_NONE) {
+               return 1;
+       }
+
+       if (info->HasTexture) {
+               return 0;
+       }
+
+       /* We can't use more than one presubtract value in an
+        * instruction, unless the two prsubtract operations
+        * are the same and read from the same registers.
+        * XXX For now we will limit instructions to only one presubtract
+        * value.*/
+       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+               return 0;
+       }
+
+       memset(&d, 0, sizeof(d));
+       d.ReplaceReg = replace_reg;
+
+       rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
+
+       num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+       src_type0 = rc_source_type_swz(presub_src0->Swizzle);
+       can_use_presub_data_add_select(&d,
+               presub_src0->File,
+               presub_src0->Index,
+               src_type0);
+
+       if (num_presub_srcs > 1) {
+               src_type1 = rc_source_type_swz(presub_src1->Swizzle);
+               can_use_presub_data_add_select(&d,
+                       presub_src1->File,
+                       presub_src1->Index,
+                       src_type1);
+
+               /* Even if both of the presub sources read from the same
+                * register, we still need to use 2 different source selects
+                * for them, so we need to increment the count to compensate.
+                */
+               if (presub_src0->File == presub_src1->File
+                   && presub_src0->Index == presub_src1->Index) {
+                       if (src_type0 & src_type1 & RC_SOURCE_RGB) {
+                               rgb_count++;
+                       }
+                       if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
+                               alpha_count++;
+                       }
+               }
+       }
+
+       /* Count the number of source selects for Alpha and RGB.  If we
+        * encounter two of the same source selects then we can ignore the
+        * first one. */
+       for (i = 0; i < d.SelectCount; i++) {
+               unsigned int j;
+               unsigned int src_type = d.Selects[i].SrcType;
+               for (j = i + 1; j < d.SelectCount; j++) {
+                       if (d.Selects[i].File == d.Selects[j].File
+                           && d.Selects[i].Index == d.Selects[j].Index) {
+                               src_type &= ~d.Selects[j].SrcType;
+                       }
+               }
+               if (src_type & RC_SOURCE_RGB) {
+                       rgb_count++;
+               }
+
+               if (src_type & RC_SOURCE_ALPHA) {
+                       alpha_count++;
+               }
+       }
+
+       if (rgb_count > 3 || alpha_count > 3) {
+               return 0;
+       }
+
+       return 1;
+}
+
+struct max_data {
+       unsigned int Max;
+       unsigned int HasFileType;
+       rc_register_file File;
+};
+
+static void max_callback(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct max_data * d = (struct max_data*)userdata;
+       if (file == d->File && (!d->HasFileType || index > d->Max)) {
+               d->Max = index;
+               d->HasFileType = 1;
+       }
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+       struct radeon_compiler * c,
+       rc_register_file file)
+{
+       struct max_data data;
+       struct rc_instruction * inst;
+       data.Max = 0;
+       data.HasFileType = 0;
+       data.File = file;
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_for_all_reads_mask(inst, max_callback, &data);
+               rc_for_all_writes_mask(inst, max_callback, &data);
+       }
+       if (!data.HasFileType) {
+               return -1;
+       } else {
+               return data.Max;
+       }
+}
+
+static unsigned int get_source_readmask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int source,
+       unsigned int src_type)
+{
+       unsigned int i;
+       unsigned int readmask = 0;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               if (sub->Arg[i].Source != source
+                   || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+                       continue;
+               }
+               readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+       }
+       return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing.  If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+       struct rc_instruction * inst,
+       unsigned int src_type,
+       unsigned int source,
+       unsigned int new_readmask)
+{
+       unsigned int readmask = 0;
+
+       readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+       readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+       if ((new_readmask & readmask) != readmask)
+               return 0;
+
+       if (src_type & RC_SOURCE_RGB) {
+               memset(&inst->U.P.RGB.Src[source], 0,
+                       sizeof(struct rc_pair_instruction_source));
+       }
+
+       if (src_type & RC_SOURCE_ALPHA) {
+               memset(&inst->U.P.Alpha.Src[source], 0,
+                       sizeof(struct rc_pair_instruction_source));
+       }
+
+       return 1;
+}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * info;
+       if (inst->Type == RC_INSTRUCTION_NORMAL) {
+               info = rc_get_opcode_info(inst->U.I.Opcode);
+       } else {
+               info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+               /*A flow control instruction shouldn't have an alpha
+                * instruction.*/
+               assert(!info->IsFlowControl ||
+                               inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+       }
+
+       if (info->IsFlowControl)
+               return info->Opcode;
+       else
+               return RC_OPCODE_NOP;
+
+}
+
+/**
+ * @return The BGNLOOP instruction that starts the loop ended by endloop.
+ */
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
+{
+       unsigned int endloop_count = 0;
+       struct rc_instruction * inst;
+       for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
+               rc_opcode op = rc_get_flow_control_inst(inst);
+               if (op == RC_OPCODE_ENDLOOP) {
+                       endloop_count++;
+               } else if (op == RC_OPCODE_BGNLOOP) {
+                       if (endloop_count == 0) {
+                               return inst;
+                       } else {
+                               endloop_count--;
+                       }
+               }
+       }
+       return NULL;
+}
+
+/**
+ * @return The ENDLOOP instruction that ends the loop started by bgnloop.
+ */
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
+{
+       unsigned int bgnloop_count = 0;
+       struct rc_instruction * inst;
+       for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
+               rc_opcode op = rc_get_flow_control_inst(inst);
+               if (op == RC_OPCODE_BGNLOOP) {
+                       bgnloop_count++;
+               } else if (op == RC_OPCODE_ENDLOOP) {
+                       if (bgnloop_count == 0) {
+                               return inst;
+                       } else {
+                               bgnloop_count--;
+                       }
+               }
+       }
+       return NULL;
+}
+
+/**
+ * @return A conversion swizzle for converting from old_mask->new_mask
+ */
+unsigned int rc_make_conversion_swizzle(
+       unsigned int old_mask,
+       unsigned int new_mask)
+{
+       unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+       unsigned int old_idx;
+       unsigned int new_idx = 0;
+       for (old_idx = 0; old_idx < 4; old_idx++) {
+               if (!GET_BIT(old_mask, old_idx))
+                       continue;
+               for ( ; new_idx < 4; new_idx++) {
+                       if (GET_BIT(new_mask, new_idx)) {
+                               SET_SWZ(conversion_swizzle, old_idx, new_idx);
+                               new_idx++;
+                               break;
+                       }
+               }
+       }
+       return conversion_swizzle;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
new file mode 100644 (file)
index 0000000..3730aa8
--- /dev/null
@@ -0,0 +1,89 @@
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+#include "radeon_opcodes.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
+struct rc_src_register;
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
+
+unsigned int combine_swizzles4(unsigned int src,
+                              rc_swizzle swz_x, rc_swizzle swz_y,
+                              rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+unsigned int rc_adjust_channels(
+       unsigned int old_swizzle,
+       unsigned int conversion_swizzle);
+
+void rc_pair_rewrite_writemask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+       struct rc_instruction * inst,
+       unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+       unsigned int swizzle,
+       unsigned int new_mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
+unsigned int rc_src_reads_dst_mask(
+               rc_register_file src_file,
+               unsigned int src_idx,
+               unsigned int src_swz,
+               rc_register_file dst_file,
+               unsigned int dst_idx,
+               unsigned int dst_mask);
+
+unsigned int rc_source_type_swz(unsigned int swizzle);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+       struct rc_instruction * inst,
+       rc_presubtract_op presub_op,
+       unsigned int presub_writemask,
+       const struct rc_src_register * replace_reg,
+       const struct rc_src_register * presub_src0,
+       const struct rc_src_register * presub_src1);
+
+int rc_get_max_index(
+       struct radeon_compiler * c,
+       rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+       struct rc_instruction * inst,
+       unsigned int src_type,
+       unsigned int source,
+       unsigned int new_readmask);
+
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
+
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
+
+unsigned int rc_make_conversion_swizzle(
+       unsigned int old_mask,
+       unsigned int new_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c
new file mode 100644 (file)
index 0000000..a8decac
--- /dev/null
@@ -0,0 +1,892 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+struct read_write_mask_data {
+       void * UserData;
+       rc_read_write_mask_fn Cb;
+};
+
+static void reads_normal_callback(
+       void * userdata,
+       struct rc_instruction * fullinst,
+       struct rc_src_register * src)
+{
+       struct read_write_mask_data * cb_data = userdata;
+       unsigned int refmask = 0;
+       unsigned int chan;
+       for(chan = 0; chan < 4; chan++) {
+               refmask |= 1 << GET_SWZ(src->Swizzle, chan);
+       }
+       refmask &= RC_MASK_XYZW;
+
+       if (refmask) {
+               cb_data->Cb(cb_data->UserData, fullinst, src->File,
+                                                       src->Index, refmask);
+       }
+
+       if (refmask && src->RelAddr) {
+               cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
+                                                               RC_MASK_X);
+       }
+}
+
+static void pair_get_src_refmasks(unsigned int * refmasks,
+                                       struct rc_pair_instruction * inst,
+                                       unsigned int swz, unsigned int src)
+{
+       if (swz >= 4)
+               return;
+
+       if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
+               if(src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       int srcp_regs =
+                               rc_presubtract_src_reg_count(
+                               inst->RGB.Src[src].Index);
+                       for(i = 0; i < srcp_regs; i++) {
+                               refmasks[i] |= 1 << swz;
+                       }
+               }
+               else {
+                       refmasks[src] |= 1 << swz;
+               }
+       }
+
+       if (swz == RC_SWIZZLE_W) {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       int srcp_regs = rc_presubtract_src_reg_count(
+                                       inst->Alpha.Src[src].Index);
+                       for(i = 0; i < srcp_regs; i++) {
+                               refmasks[i] |= 1 << swz;
+                       }
+               }
+               else {
+                       refmasks[src] |= 1 << swz;
+               }
+       }
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+       struct rc_pair_instruction * inst = &fullinst->U.P;
+       unsigned int refmasks[3] = { 0, 0, 0 };
+
+       unsigned int arg;
+
+       for(arg = 0; arg < 3; ++arg) {
+               unsigned int chan;
+               for(chan = 0; chan < 3; ++chan) {
+                       unsigned int swz_rgb =
+                               GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+                       unsigned int swz_alpha =
+                               GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
+                       pair_get_src_refmasks(refmasks, inst, swz_rgb,
+                                               inst->RGB.Arg[arg].Source);
+                       pair_get_src_refmasks(refmasks, inst, swz_alpha,
+                                               inst->Alpha.Arg[arg].Source);
+               }
+       }
+
+       for(unsigned int src = 0; src < 3; ++src) {
+               if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+                       cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+                          refmasks[src] & RC_MASK_XYZ);
+
+               if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+                       cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+       }
+}
+
+static void pair_sub_for_all_args(
+       struct rc_instruction * fullinst,
+       struct rc_pair_sub_instruction * sub,
+       rc_pair_read_arg_fn cb,
+       void * userdata)
+{
+       int i;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               unsigned int src_type;
+
+               src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+
+               if (src_type == RC_SOURCE_NONE)
+                       continue;
+
+               if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+                       unsigned int presub_type;
+                       unsigned int presub_src_count;
+                       struct rc_pair_instruction_source * src_array;
+                       unsigned int j;
+
+                       if (src_type & RC_SOURCE_RGB) {
+                               presub_type = fullinst->
+                                       U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+                               src_array = fullinst->U.P.RGB.Src;
+                       } else {
+                               presub_type = fullinst->
+                                       U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+                               src_array = fullinst->U.P.Alpha.Src;
+                       }
+                       presub_src_count
+                               = rc_presubtract_src_reg_count(presub_type);
+                       for(j = 0; j < presub_src_count; j++) {
+                               cb(userdata, fullinst, &sub->Arg[i],
+                                                               &src_array[j]);
+                       }
+               } else {
+                       struct rc_pair_instruction_source * src =
+                               rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+                       if (src) {
+                               cb(userdata, fullinst, &sub->Arg[i], src);
+                       }
+               }
+       }
+}
+
+/* This function calls the callback function (cb) for each source used by
+ * the instruction.
+ * */
+void rc_for_all_reads_src(
+       struct rc_instruction * inst,
+       rc_read_src_fn cb,
+       void * userdata)
+{
+       const struct rc_opcode_info * opcode =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+
+       /* This function only works with normal instructions. */
+       if (inst->Type != RC_INSTRUCTION_NORMAL) {
+               assert(0);
+               return;
+       }
+
+       for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+
+               if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
+                       continue;
+
+               if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
+                       unsigned int i;
+                       unsigned int srcp_regs = rc_presubtract_src_reg_count(
+                                               inst->U.I.PreSub.Opcode);
+                       for( i = 0; i < srcp_regs; i++) {
+                               cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
+                       }
+               } else {
+                       cb(userdata, inst, &inst->U.I.SrcReg[src]);
+               }
+       }
+}
+
+/**
+ * This function calls the callback function (cb) for each arg of the RGB and
+ * alpha components.
+ */
+void rc_pair_for_all_reads_arg(
+       struct rc_instruction * inst,
+       rc_pair_read_arg_fn cb,
+       void * userdata)
+{
+       /* This function only works with pair instructions. */
+       if (inst->Type != RC_INSTRUCTION_PAIR) {
+               assert(0);
+               return;
+       }
+
+       pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
+       pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+       if (inst->Type == RC_INSTRUCTION_NORMAL) {
+               struct read_write_mask_data cb_data;
+               cb_data.UserData = userdata;
+               cb_data.Cb = cb;
+
+               rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
+       } else {
+               reads_pair(inst, cb, userdata);
+       }
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+       struct rc_sub_instruction * inst = &fullinst->U.I;
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+       if (opcode->HasDstReg && inst->DstReg.WriteMask)
+               cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+       if (inst->WriteALUResult)
+               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+       struct rc_pair_instruction * inst = &fullinst->U.P;
+
+       if (inst->RGB.WriteMask)
+               cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+       if (inst->Alpha.WriteMask)
+               cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+       if (inst->WriteALUResult)
+               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+       if (inst->Type == RC_INSTRUCTION_NORMAL) {
+               writes_normal(inst, cb, userdata);
+       } else {
+               writes_pair(inst, cb, userdata);
+       }
+}
+
+
+struct mask_to_chan_data {
+       void * UserData;
+       rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct mask_to_chan_data * d = data;
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               if (GET_BIT(mask, chan))
+                       d->Fn(d->UserData, inst, file, index, chan);
+       }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+       struct mask_to_chan_data d;
+       d.UserData = userdata;
+       d.Fn = cb;
+       rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+       struct mask_to_chan_data d;
+       d.UserData = userdata;
+       d.Fn = cb;
+       rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+               rc_remap_register_fn cb, void * userdata)
+{
+       struct rc_sub_instruction * inst = &fullinst->U.I;
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+       unsigned int remapped_presub = 0;
+
+       if (opcode->HasDstReg) {
+               rc_register_file file = inst->DstReg.File;
+               unsigned int index = inst->DstReg.Index;
+
+               cb(userdata, fullinst, &file, &index);
+
+               inst->DstReg.File = file;
+               inst->DstReg.Index = index;
+       }
+
+       for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+               rc_register_file file = inst->SrcReg[src].File;
+               unsigned int index = inst->SrcReg[src].Index;
+
+               if (file == RC_FILE_PRESUB) {
+                       unsigned int i;
+                       unsigned int srcp_srcs = rc_presubtract_src_reg_count(
+                                               inst->PreSub.Opcode);
+                       /* Make sure we only remap presubtract sources once in
+                        * case more than one source register reads the
+                        * presubtract result. */
+                       if (remapped_presub)
+                               continue;
+
+                       for(i = 0; i < srcp_srcs; i++) {
+                               file = inst->PreSub.SrcReg[i].File;
+                               index = inst->PreSub.SrcReg[i].Index;
+                               cb(userdata, fullinst, &file, &index);
+                               inst->PreSub.SrcReg[i].File = file;
+                               inst->PreSub.SrcReg[i].Index = index;
+                       }
+                       remapped_presub = 1;
+               }
+               else {
+                       cb(userdata, fullinst, &file, &index);
+
+                       inst->SrcReg[src].File = file;
+                       inst->SrcReg[src].Index = index;
+               }
+       }
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+               rc_remap_register_fn cb, void * userdata)
+{
+       struct rc_pair_instruction * inst = &fullinst->U.P;
+
+       if (inst->RGB.WriteMask) {
+               rc_register_file file = RC_FILE_TEMPORARY;
+               unsigned int index = inst->RGB.DestIndex;
+
+               cb(userdata, fullinst, &file, &index);
+
+               inst->RGB.DestIndex = index;
+       }
+
+       if (inst->Alpha.WriteMask) {
+               rc_register_file file = RC_FILE_TEMPORARY;
+               unsigned int index = inst->Alpha.DestIndex;
+
+               cb(userdata, fullinst, &file, &index);
+
+               inst->Alpha.DestIndex = index;
+       }
+
+       for(unsigned int src = 0; src < 3; ++src) {
+               if (inst->RGB.Src[src].Used) {
+                       rc_register_file file = inst->RGB.Src[src].File;
+                       unsigned int index = inst->RGB.Src[src].Index;
+
+                       cb(userdata, fullinst, &file, &index);
+
+                       inst->RGB.Src[src].File = file;
+                       inst->RGB.Src[src].Index = index;
+               }
+
+               if (inst->Alpha.Src[src].Used) {
+                       rc_register_file file = inst->Alpha.Src[src].File;
+                       unsigned int index = inst->Alpha.Src[src].Index;
+
+                       cb(userdata, fullinst, &file, &index);
+
+                       inst->Alpha.Src[src].File = file;
+                       inst->Alpha.Src[src].Index = index;
+               }
+       }
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+       if (inst->Type == RC_INSTRUCTION_NORMAL)
+               remap_normal_instruction(inst, cb, userdata);
+       else
+               remap_pair_instruction(inst, cb, userdata);
+}
+
+struct branch_write_mask {
+       unsigned int IfWriteMask:4;
+       unsigned int ElseWriteMask:4;
+       unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+       rc_read_src_fn I;
+       rc_pair_read_arg_fn P;
+};
+
+struct get_readers_callback_data {
+       struct radeon_compiler * C;
+       struct rc_reader_data * ReaderData;
+       rc_read_src_fn ReadNormalCB;
+       rc_pair_read_arg_fn ReadPairCB;
+       rc_read_write_mask_fn WriteCB;
+       rc_register_file DstFile;
+       unsigned int DstIndex;
+       unsigned int DstMask;
+       unsigned int AliveWriteMask;
+       /*  For convenience, this is indexed starting at 1 */
+       struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
+};
+
+static struct rc_reader * add_reader(
+       struct memory_pool * pool,
+       struct rc_reader_data * data,
+       struct rc_instruction * inst,
+       unsigned int mask)
+{
+       struct rc_reader * new;
+       memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+                               data->ReaderCount, data->ReadersReserved, 1);
+       new = &data->Readers[data->ReaderCount++];
+       new->Inst = inst;
+       new->WriteMask = mask;
+       return new;
+}
+
+static void add_reader_normal(
+       struct memory_pool * pool,
+       struct rc_reader_data * data,
+       struct rc_instruction * inst,
+       unsigned int mask,
+       struct rc_src_register * src)
+{
+       struct rc_reader * new = add_reader(pool, data, inst, mask);
+       new->U.I.Src = src;
+}
+
+
+static void add_reader_pair(
+       struct memory_pool * pool,
+       struct rc_reader_data * data,
+       struct rc_instruction * inst,
+       unsigned int mask,
+       struct rc_pair_instruction_arg * arg,
+       struct rc_pair_instruction_source * src)
+{
+       struct rc_reader * new = add_reader(pool, data, inst, mask);
+       new->U.P.Src = src;
+       new->U.P.Arg = arg;
+}
+
+static unsigned int get_readers_read_callback(
+       struct get_readers_callback_data * cb_data,
+       unsigned int has_rel_addr,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int swizzle)
+{
+       unsigned int shared_mask, read_mask;
+
+       if (has_rel_addr) {
+               cb_data->ReaderData->Abort = 1;
+               return RC_MASK_NONE;
+       }
+
+       shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+               cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+       if (shared_mask == RC_MASK_NONE)
+               return shared_mask;
+
+       /* If we make it this far, it means that this source reads from the
+        * same register written to by d->ReaderData->Writer. */
+
+       read_mask = rc_swizzle_to_writemask(swizzle);
+       if (cb_data->ReaderData->AbortOnRead & read_mask) {
+               cb_data->ReaderData->Abort = 1;
+               return shared_mask;
+       }
+
+       if (cb_data->ReaderData->LoopDepth > 0) {
+               cb_data->ReaderData->AbortOnWrite |=
+                               (read_mask & cb_data->AliveWriteMask);
+       }
+
+       /* XXX The behavior in this case should be configurable. */
+       if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+               cb_data->ReaderData->Abort = 1;
+               return shared_mask;
+       }
+
+       return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_pair_instruction_arg * arg,
+       struct rc_pair_instruction_source * src)
+{
+       unsigned int shared_mask;
+       struct get_readers_callback_data * d = userdata;
+
+       shared_mask = get_readers_read_callback(d,
+                               0 /*Pair Instructions don't use RelAddr*/,
+                               src->File, src->Index, arg->Swizzle);
+
+       if (shared_mask == RC_MASK_NONE)
+               return;
+
+       if (d->ReadPairCB)
+               d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+       if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+               return;
+
+       add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       struct get_readers_callback_data * d = userdata;
+       unsigned int shared_mask;
+
+       shared_mask = get_readers_read_callback(d,
+                       src->RelAddr, src->File, src->Index, src->Swizzle);
+
+       if (shared_mask == RC_MASK_NONE)
+               return;
+       /* The callback function could potentially clear d->ReaderData->Abort,
+        * so we need to call it before we return. */
+       if (d->ReadNormalCB)
+               d->ReadNormalCB(d->ReaderData, inst, src);
+
+       if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+               return;
+
+       add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+       void *userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct get_readers_callback_data * d = userdata;
+
+       if (index == d->DstIndex && file == d->DstFile) {
+               unsigned int shared_mask = mask & d->DstMask;
+               d->ReaderData->AbortOnRead &= ~shared_mask;
+               d->AliveWriteMask &= ~shared_mask;
+               if (d->ReaderData->AbortOnWrite & shared_mask) {
+                       d->ReaderData->Abort = 1;
+               }
+       }
+
+       if(d->WriteCB)
+               d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void push_branch_mask(
+       struct get_readers_callback_data * d,
+       unsigned int * branch_depth)
+{
+       (*branch_depth)++;
+       if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+               d->ReaderData->Abort = 1;
+               return;
+       }
+       d->BranchMasks[*branch_depth].IfWriteMask =
+                                       d->AliveWriteMask;
+}
+
+static void pop_branch_mask(
+       struct get_readers_callback_data * d,
+       unsigned int * branch_depth)
+{
+       struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
+
+       if (masks->HasElse) {
+               /* Abort on read for components that were written in the IF
+                * block. */
+               d->ReaderData->AbortOnRead |=
+                               masks->IfWriteMask & ~masks->ElseWriteMask;
+               /* Abort on read for components that were written in the ELSE
+                * block. */
+               d->ReaderData->AbortOnRead |=
+                               masks->ElseWriteMask & ~d->AliveWriteMask;
+
+               d->AliveWriteMask = masks->IfWriteMask
+                       ^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
+                       & (masks->IfWriteMask ^ d->AliveWriteMask));
+       } else {
+               d->ReaderData->AbortOnRead |=
+                               masks->IfWriteMask & ~d->AliveWriteMask;
+               d->AliveWriteMask = masks->IfWriteMask;
+
+       }
+       memset(masks, 0, sizeof(struct branch_write_mask));
+       (*branch_depth)--;
+}
+
+static void get_readers_for_single_write(
+       void * userdata,
+       struct rc_instruction * writer,
+       rc_register_file dst_file,
+       unsigned int dst_index,
+       unsigned int dst_mask)
+{
+       struct rc_instruction * tmp;
+       unsigned int branch_depth = 0;
+       struct rc_instruction * endloop = NULL;
+       unsigned int abort_on_read_at_endloop = 0;
+       struct get_readers_callback_data * d = userdata;
+
+       d->ReaderData->Writer = writer;
+       d->ReaderData->AbortOnRead = 0;
+       d->ReaderData->AbortOnWrite = 0;
+       d->ReaderData->LoopDepth = 0;
+       d->ReaderData->InElse = 0;
+       d->DstFile = dst_file;
+       d->DstIndex = dst_index;
+       d->DstMask = dst_mask;
+       d->AliveWriteMask = dst_mask;
+       memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+       if (!dst_mask)
+               return;
+
+       for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+                                                       tmp = tmp->Next){
+               rc_opcode opcode = rc_get_flow_control_inst(tmp);
+               switch(opcode) {
+               case RC_OPCODE_BGNLOOP:
+                       d->ReaderData->LoopDepth++;
+                       push_branch_mask(d, &branch_depth);
+                       break;
+               case RC_OPCODE_ENDLOOP:
+                       if (d->ReaderData->LoopDepth > 0) {
+                               d->ReaderData->LoopDepth--;
+                               if (d->ReaderData->LoopDepth == 0) {
+                                       d->ReaderData->AbortOnWrite = 0;
+                               }
+                               pop_branch_mask(d, &branch_depth);
+                       } else {
+                               /* Here we have reached an ENDLOOP without
+                                * seeing its BGNLOOP.  These means that
+                                * the writer was written inside of a loop,
+                                * so it could have readers that are above it
+                                * (i.e. they have a lower IP).  To find these
+                                * readers we jump to the BGNLOOP instruction
+                                * and check each instruction until we get
+                                * back to the writer.
+                                */
+                               endloop = tmp;
+                               tmp = rc_match_endloop(tmp);
+                               if (!tmp) {
+                                       rc_error(d->C, "Failed to match endloop.\n");
+                                       d->ReaderData->Abort = 1;
+                                       return;
+                               }
+                               abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
+                               d->ReaderData->AbortOnRead |= d->AliveWriteMask;
+                               continue;
+                       }
+                       break;
+               case RC_OPCODE_IF:
+                       push_branch_mask(d, &branch_depth);
+                       break;
+               case RC_OPCODE_ELSE:
+                       if (branch_depth == 0) {
+                               d->ReaderData->InElse = 1;
+                       } else {
+                               unsigned int temp_mask = d->AliveWriteMask;
+                               d->AliveWriteMask =
+                                       d->BranchMasks[branch_depth].IfWriteMask;
+                               d->BranchMasks[branch_depth].ElseWriteMask =
+                                                               temp_mask;
+                               d->BranchMasks[branch_depth].HasElse = 1;
+                       }
+                       break;
+               case RC_OPCODE_ENDIF:
+                       if (branch_depth == 0) {
+                               d->ReaderData->AbortOnRead = d->AliveWriteMask;
+                               d->ReaderData->InElse = 0;
+                       }
+                       else {
+                               pop_branch_mask(d, &branch_depth);
+                       }
+                       break;
+               default:
+                       break;
+               }
+
+               if (d->ReaderData->InElse)
+                       continue;
+
+               if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+                       rc_for_all_reads_src(tmp,
+                               get_readers_normal_read_callback, d);
+               } else {
+                       rc_pair_for_all_reads_arg(tmp,
+                               get_readers_pair_read_callback, d);
+               }
+
+               /* This can happen when we jump from an ENDLOOP to BGNLOOP */
+               if (tmp == writer) {
+                       tmp = endloop;
+                       endloop = NULL;
+                       d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
+                       continue;
+               }
+               rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
+
+               if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+                       return;
+
+               if (branch_depth == 0 && !d->AliveWriteMask)
+                       return;
+       }
+}
+
+static void init_get_readers_callback_data(
+       struct get_readers_callback_data * d,
+       struct rc_reader_data * reader_data,
+       struct radeon_compiler * c,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb)
+{
+       reader_data->Abort = 0;
+       reader_data->ReaderCount = 0;
+       reader_data->ReadersReserved = 0;
+       reader_data->Readers = NULL;
+
+       d->C = c;
+       d->ReaderData = reader_data;
+       d->ReadNormalCB = read_normal_cb;
+       d->ReadPairCB = read_pair_cb;
+       d->WriteCB = write_cb;
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction.  Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void rc_get_readers(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb)
+{
+       struct get_readers_callback_data d;
+
+       init_get_readers_callback_data(&d, data, c, read_normal_cb,
+                                               read_pair_cb, write_cb);
+
+       rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
+}
+
+void rc_get_readers_sub(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_pair_sub_instruction * sub_writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb)
+{
+       struct get_readers_callback_data d;
+
+       init_get_readers_callback_data(&d, data, c, read_normal_cb,
+                                               read_pair_cb, write_cb);
+
+       if (sub_writer->WriteMask) {
+               get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+                       sub_writer->DestIndex, sub_writer->WriteMask);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
new file mode 100644 (file)
index 0000000..d8a6272
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+struct rc_src_register;
+struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
+struct rc_compiler;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+                       rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+                       rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
+                       struct rc_src_register * src);
+void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
+                       void * userdata);
+
+typedef void (*rc_pair_read_arg_fn)(void * userdata,
+       struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+       struct rc_pair_instruction_source * src);
+void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
+                                       rc_pair_read_arg_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+                       rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+struct rc_reader {
+       struct rc_instruction * Inst;
+       unsigned int WriteMask;
+       union {
+               struct {
+                       struct rc_src_register * Src;
+               } I;
+               struct {
+                       struct rc_pair_instruction_arg * Arg;
+                       struct rc_pair_instruction_source * Src;
+               } P;
+       } U;
+};
+
+struct rc_reader_data {
+       unsigned int Abort;
+       unsigned int AbortOnRead;
+       unsigned int AbortOnWrite;
+       unsigned int LoopDepth;
+       unsigned int InElse;
+       struct rc_instruction * Writer;
+
+       unsigned int ReaderCount;
+       unsigned int ReadersReserved;
+       struct rc_reader * Readers;
+
+       /* If this flag is enabled, rc_get_readers will exit as soon possbile
+        * after the Abort flag is set.*/
+       unsigned int ExitOnAbort;
+       void * CbData;
+};
+
+void rc_get_readers(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_pair_sub_instruction * sub_writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb);
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+                       void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644 (file)
index 0000000..678e147
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+       unsigned char Output[RC_REGISTER_MAX_INDEX];
+       unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+       unsigned char Address;
+       unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+       unsigned char WriteMask:4;
+       unsigned char WriteALUResult:1;
+       unsigned char SrcReg[3];
+};
+
+struct loopinfo {
+       struct updatemask_state * Breaks;
+       unsigned int BreakCount;
+       unsigned int BreaksReserved;
+};
+
+struct branchinfo {
+       unsigned int HaveElse:1;
+
+       struct updatemask_state StoreEndif;
+       struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+       struct radeon_compiler * C;
+       struct instruction_state * Instructions;
+
+       struct updatemask_state R;
+
+       struct branchinfo * BranchStack;
+       unsigned int BranchStackSize;
+       unsigned int BranchStackReserved;
+
+       struct loopinfo * LoopStack;
+       unsigned int LoopStackSize;
+       unsigned int LoopStackReserved;
+};
+
+
+static void or_updatemasks(
+       struct updatemask_state * dst,
+       struct updatemask_state * a,
+       struct updatemask_state * b)
+{
+       for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+               dst->Output[i] = a->Output[i] | b->Output[i];
+               dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+       }
+
+       for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+               dst->Special[i] = a->Special[i] | b->Special[i];
+
+       dst->Address = a->Address | b->Address;
+}
+
+static void push_break(struct deadcode_state *s)
+{
+       struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+       memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+               loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+       memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+       memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+                       s->LoopStackSize, s->LoopStackReserved, 1);
+       memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+       struct branchinfo * branch;
+
+       memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+                       s->BranchStackSize, s->BranchStackReserved, 1);
+
+       branch = &s->BranchStack[s->BranchStackSize++];
+       branch->HaveElse = 0;
+       memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+       if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+               if (index >= RC_REGISTER_MAX_INDEX) {
+                       rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+                       return 0;
+               }
+
+               if (file == RC_FILE_OUTPUT)
+                       return &s->R.Output[index];
+               else
+                       return &s->R.Temporary[index];
+       } else if (file == RC_FILE_ADDRESS) {
+               return &s->R.Address;
+       } else if (file == RC_FILE_SPECIAL) {
+               if (index >= RC_NUM_SPECIAL_REGISTERS) {
+                       rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+                       return 0;
+               }
+
+               return &s->R.Special[index];
+       }
+
+       return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+       unsigned char * pused = get_used_ptr(s, file, index);
+       if (pused)
+               *pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       struct instruction_state * insts = &s->Instructions[inst->IP];
+       unsigned int usedmask = 0;
+       unsigned int srcmasks[3];
+
+       if (opcode->HasDstReg) {
+               unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+               if (pused) {
+                       usedmask = *pused & inst->U.I.DstReg.WriteMask;
+                       *pused &= ~usedmask;
+               }
+       }
+
+       insts->WriteMask |= usedmask;
+
+       if (inst->U.I.WriteALUResult) {
+               unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+               if (pused && *pused) {
+                       if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+                               usedmask |= RC_MASK_X;
+                       else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+                               usedmask |= RC_MASK_W;
+
+                       *pused = 0;
+                       insts->WriteALUResult = 1;
+               }
+       }
+
+       rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+       for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+               unsigned int refmask = 0;
+               unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+               insts->SrcReg[src] |= newsrcmask;
+
+               for(unsigned int chan = 0; chan < 4; ++chan) {
+                       if (GET_BIT(newsrcmask, chan))
+                               refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+               }
+
+               /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+               refmask &= RC_MASK_XYZW;
+
+               if (!refmask)
+                       continue;
+
+               mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+               if (inst->U.I.SrcReg[src].RelAddr)
+                       mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+       }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+       struct deadcode_state * s = data;
+
+       mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
+{
+       struct deadcode_state s;
+       unsigned int nr_instructions;
+       rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
+       unsigned int ip;
+
+       memset(&s, 0, sizeof(s));
+       s.C = c;
+
+       nr_instructions = rc_recompute_ips(c);
+       s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+       memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+       dce(c, &s, &mark_output_use);
+
+       for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+           inst != &c->Program.Instructions;
+           inst = inst->Prev) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               switch(opcode->Opcode){
+               /* Mark all sources in the loop body as used before doing
+                * normal deadcode analysis.  This is probably not optimal.
+                */
+               case RC_OPCODE_ENDLOOP:
+               {
+                       int endloops = 1;
+                       struct rc_instruction *ptr;
+                       for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
+                               opcode = rc_get_opcode_info(ptr->U.I.Opcode);
+                               if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+                                       endloops--;
+                                       continue;
+                               }
+                               if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
+                                       endloops++;
+                                       continue;
+                               }
+                               if(opcode->HasDstReg){
+                                       int src = 0;
+                                       unsigned int srcmasks[3];
+                                       rc_compute_sources_for_writemask(ptr,
+                                               ptr->U.I.DstReg.WriteMask, srcmasks);
+                                       for(src=0; src < opcode->NumSrcRegs; src++){
+                                               mark_used(&s,
+                                                       ptr->U.I.SrcReg[src].File,
+                                                       ptr->U.I.SrcReg[src].Index,
+                                                       srcmasks[src]);
+                                       }
+                               }
+                       }
+                       push_loop(&s);
+                       break;
+               }
+               case RC_OPCODE_BRK:
+                       push_break(&s);
+                       break;
+               case RC_OPCODE_BGNLOOP:
+               {
+                       unsigned int i;
+                       struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+                       for(i = 0; i < loop->BreakCount; i++) {
+                               or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+                       }
+                       break;
+               }
+               case RC_OPCODE_CONT:
+                       break;
+               case RC_OPCODE_ENDIF:
+                       push_branch(&s);
+                       break;
+               default:
+                       if (opcode->IsFlowControl && s.BranchStackSize) {
+                               struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+                               if (opcode->Opcode == RC_OPCODE_IF) {
+                                       or_updatemasks(&s.R,
+                                                       &s.R,
+                                                       branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+                                       s.BranchStackSize--;
+                               } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+                                       if (branch->HaveElse) {
+                                               rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+                                       } else {
+                                               memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+                                               memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+                                               branch->HaveElse = 1;
+                                       }
+                               } else {
+                                       rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+                               }
+                       }
+               }
+
+               update_instruction(&s, inst);
+       }
+
+       ip = 0;
+       for(struct rc_instruction * inst = c->Program.Instructions.Next;
+           inst != &c->Program.Instructions;
+           inst = inst->Next, ++ip) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               int dead = 1;
+               unsigned int srcmasks[3];
+               unsigned int usemask;
+
+               if (!opcode->HasDstReg) {
+                       dead = 0;
+               } else {
+                       inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+                       if (s.Instructions[ip].WriteMask)
+                               dead = 0;
+
+                       if (s.Instructions[ip].WriteALUResult)
+                               dead = 0;
+                       else
+                               inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+               }
+
+               if (dead) {
+                       struct rc_instruction * todelete = inst;
+                       inst = inst->Prev;
+                       rc_remove_instruction(todelete);
+                       continue;
+               }
+
+               usemask = s.Instructions[ip].WriteMask;
+
+               if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+                       usemask |= RC_MASK_X;
+               else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+                       usemask |= RC_MASK_W;
+
+               rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+               for(unsigned int src = 0; src < 3; ++src) {
+                       for(unsigned int chan = 0; chan < 4; ++chan) {
+                               if (!GET_BIT(srcmasks[src], chan))
+                                       SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+                       }
+               }
+       }
+
+       rc_calculate_inputs_outputs(c);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644 (file)
index 0000000..133a9f7
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+               struct rc_instruction * inst, unsigned src)
+{
+       struct rc_swizzle_split split;
+       unsigned int tempreg = rc_find_free_temporary(c);
+       unsigned int usemask;
+
+       usemask = 0;
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+                       usemask |= 1 << chan;
+       }
+
+       c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+       for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+               struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+               unsigned int phase_refmask;
+               unsigned int masked_negate;
+
+               mov->U.I.Opcode = RC_OPCODE_MOV;
+               mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               mov->U.I.DstReg.Index = tempreg;
+               mov->U.I.DstReg.WriteMask = split.Phase[phase];
+               mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+               mov->U.I.PreSub = inst->U.I.PreSub;
+
+               phase_refmask = 0;
+               for(unsigned int chan = 0; chan < 4; ++chan) {
+                       if (!GET_BIT(split.Phase[phase], chan))
+                               SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+                       else
+                               phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+               }
+
+               phase_refmask &= RC_MASK_XYZW;
+
+               masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+               if (masked_negate == 0)
+                       mov->U.I.SrcReg[0].Negate = 0;
+               else if (masked_negate == split.Phase[phase])
+                       mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+       }
+
+       inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[src].Index = tempreg;
+       inst->U.I.SrcReg[src].Swizzle = 0;
+       inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+       inst->U.I.SrcReg[src].Abs = 0;
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+                               GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+       }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
+{
+       struct rc_instruction * inst;
+
+       for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               unsigned int src;
+
+               for(src = 0; src < opcode->NumSrcRegs; ++src) {
+                       if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+                               rewrite_source(c, inst, src);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
new file mode 100644 (file)
index 0000000..7bede34
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+       unsigned int Proxied:1;
+       unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+       struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+       struct rc_instruction * If;
+       struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+       struct radeon_compiler * C;
+
+       struct branch_info * Branches;
+       unsigned int BranchCount;
+       unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+       struct branch_info * branch;
+       struct rc_instruction * inst_mov;
+
+       memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+                       s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+       DBG("%s\n", __FUNCTION__);
+
+       branch = &s->Branches[s->BranchCount++];
+       memset(branch, 0, sizeof(struct branch_info));
+       branch->If = inst;
+
+       /* Make a safety copy of the decision register, because we will need
+        * it at ENDIF time and it might be overwritten in both branches. */
+       inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+       inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+       inst->U.I.SrcReg[0].Swizzle = 0;
+       inst->U.I.SrcReg[0].Abs = 0;
+       inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+       struct branch_info * branch;
+
+       if (!s->BranchCount) {
+               rc_error(s->C, "Encountered ELSE outside of branches");
+               return;
+       }
+
+       DBG("%s\n", __FUNCTION__);
+
+       branch = &s->Branches[s->BranchCount - 1];
+       branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+       struct emulate_branch_state * S;
+       struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+                       rc_register_file file, unsigned int index)
+{
+       if (file == RC_FILE_TEMPORARY) {
+               return &sap->Proxies->Temporary[index];
+       } else {
+               return 0;
+       }
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int comp)
+{
+       struct state_and_proxies * sap = userdata;
+       struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+       if (proxy && !proxy->Proxied) {
+               proxy->Proxied = 1;
+               proxy->Index = rc_find_free_temporary(sap->S->C);
+       }
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+               rc_register_file * pfile, unsigned int * pindex)
+{
+       struct state_and_proxies * sap = userdata;
+       struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+       if (proxy && proxy->Proxied) {
+               *pfile = RC_FILE_TEMPORARY;
+               *pindex = proxy->Index;
+       }
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+               struct register_proxies * proxies,
+               struct rc_instruction * begin,
+               struct rc_instruction * end)
+{
+       struct state_and_proxies sap;
+
+       sap.S = s;
+       sap.Proxies = proxies;
+
+       for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+               rc_for_all_writes_mask(inst, scan_write, &sap);
+               rc_remap_registers(inst, remap_proxy_function, &sap);
+       }
+
+       for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+               if (proxies->Temporary[index].Proxied) {
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+                       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+                       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+                       inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+                       inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_mov->U.I.SrcReg[0].Index = index;
+               }
+       }
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+               struct rc_instruction * inst_if,
+               struct rc_instruction * inst_endif,
+               rc_register_file file, unsigned int index,
+               struct proxy_info ifproxy,
+               struct proxy_info elseproxy)
+{
+       struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+       inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+       inst_cmp->U.I.DstReg.File = file;
+       inst_cmp->U.I.DstReg.Index = index;
+       inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+       inst_cmp->U.I.SrcReg[0].Abs = 1;
+       inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+       inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+       inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+       inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+       inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+       struct branch_info * branch;
+       struct register_proxies IfProxies;
+       struct register_proxies ElseProxies;
+
+       if (!s->BranchCount) {
+               rc_error(s->C, "Encountered ENDIF outside of branches");
+               return;
+       }
+
+       DBG("%s\n", __FUNCTION__);
+
+       branch = &s->Branches[s->BranchCount - 1];
+
+       memset(&IfProxies, 0, sizeof(IfProxies));
+       memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+       allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+       if (branch->Else)
+               allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+       /* Insert the CMP instructions at the end. */
+       for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+               if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+                       inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+                                       IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+               }
+       }
+
+       /* Remove all traces of the branch instructions */
+       rc_remove_instruction(branch->If);
+       if (branch->Else)
+               rc_remove_instruction(branch->Else);
+       rc_remove_instruction(inst);
+
+       s->BranchCount--;
+
+       if (VERBOSE) {
+               DBG("Program after ENDIF handling:\n");
+               rc_print_program(&s->C->Program);
+       }
+}
+
+
+struct remap_output_data {
+       unsigned int Output:RC_REGISTER_INDEX_BITS;
+       unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+               rc_register_file * pfile, unsigned int * pindex)
+{
+       struct remap_output_data * data = userdata;
+
+       if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+               *pfile = RC_FILE_TEMPORARY;
+               *pindex = data->Temporary;
+       }
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * opcode;
+
+       if (!s->BranchCount)
+               return;
+
+       opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+       if (!opcode->HasDstReg)
+               return;
+
+       if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+               struct remap_output_data remap;
+               struct rc_instruction * inst_mov;
+
+               remap.Output = inst->U.I.DstReg.Index;
+               remap.Temporary = rc_find_free_temporary(s->C);
+
+               for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+                   inst != &s->C->Program.Instructions;
+                   inst = inst->Next) {
+                       rc_remap_registers(inst, &remap_output_function, &remap);
+               }
+
+               inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+               inst_mov->U.I.DstReg.Index = remap.Output;
+               inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+               inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+       }
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler *c, void *user)
+{
+       struct emulate_branch_state s;
+       struct rc_instruction * ptr;
+
+       memset(&s, 0, sizeof(s));
+       s.C = c;
+
+       /* Untypical loop because we may remove the current instruction */
+       ptr = c->Program.Instructions.Next;
+       while(ptr != &c->Program.Instructions) {
+               struct rc_instruction * inst = ptr;
+               ptr = ptr->Next;
+
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       switch(inst->U.I.Opcode) {
+                       case RC_OPCODE_IF:
+                               handle_if(&s, inst);
+                               break;
+                       case RC_OPCODE_ELSE:
+                               handle_else(&s, inst);
+                               break;
+                       case RC_OPCODE_ENDIF:
+                               handle_endif(&s, inst);
+                               break;
+                       default:
+                               fix_output_writes(&s, inst);
+                               break;
+                       }
+               } else {
+                       rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
new file mode 100644 (file)
index 0000000..818ab84
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
new file mode 100644 (file)
index 0000000..205eecd
--- /dev/null
@@ -0,0 +1,522 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct const_value {
+       struct radeon_compiler * C;
+       struct rc_src_register * Src;
+       float Value;
+       int HasValue;
+};
+
+struct count_inst {
+       struct radeon_compiler * C;
+       int Index;
+       rc_swizzle Swz;
+       float Amount;
+       int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+                                               struct rc_src_register * src,
+                                               int chan)
+{
+       float base = 1.0f;
+       int swz = GET_SWZ(src->Swizzle, chan);
+       if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+               rc_error(c, "get_constant_value: Can't find a value.\n");
+               return 0.0f;
+       }
+       if(GET_BIT(src->Negate, chan)){
+               base = -1.0f;
+       }
+       return base *
+               c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+                                               struct radeon_compiler * c)
+{
+       return src->File == RC_FILE_CONSTANT &&
+       c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+                       struct loop_info * loop)
+{
+       unsigned int total_i = rc_recompute_ips(c);
+       unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
+       /* +1 because the program already has one iteration of the loop. */
+       return 1 + ((c->max_alu_insts - total_i) / loop_i);
+}
+
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+                                               unsigned int iterations)
+{
+       unsigned int i;
+       struct rc_instruction * ptr;
+       struct rc_instruction * first = loop->BeginLoop->Next;
+       struct rc_instruction * last = loop->EndLoop->Prev;
+       struct rc_instruction * append_to = last;
+       rc_remove_instruction(loop->BeginLoop);
+       rc_remove_instruction(loop->EndLoop);
+       for( i = 1; i < iterations; i++){
+               for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+                       struct rc_instruction *new = rc_alloc_instruction(c);
+                       memcpy(new, ptr, sizeof(struct rc_instruction));
+                       rc_insert_instruction(append_to, new);
+                       append_to = new;
+               }
+       }
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct const_value * value = data;
+       if(value->Src->File != file ||
+          value->Src->Index != index ||
+          !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+               return;
+       }
+       switch(inst->U.I.Opcode){
+       case RC_OPCODE_MOV:
+               if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+                       return;
+               }
+               value->HasValue = 1;
+               value->Value =
+                       get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+               break;
+       }
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct count_inst * count_inst = data;
+       int amnt_src_index;
+       const struct rc_opcode_info * opcode;
+       float amount;
+
+       if(file != RC_FILE_TEMPORARY ||
+          count_inst->Index != index ||
+          (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+               return;
+       }
+       /* Find the index of the counter register. */
+       opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       if(opcode->NumSrcRegs != 2){
+               count_inst->Unknown = 1;
+               return;
+       }
+       if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+          inst->U.I.SrcReg[0].Index == count_inst->Index &&
+          inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+               amnt_src_index = 1;
+       } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+                  inst->U.I.SrcReg[1].Index == count_inst->Index &&
+                  inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+               amnt_src_index = 0;
+       }
+       else{
+               count_inst->Unknown = 1;
+               return;
+       }
+       if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+                                                       count_inst->C)){
+               amount = get_constant_value(count_inst->C,
+                               &inst->U.I.SrcReg[amnt_src_index], 0);
+       }
+       else{
+               count_inst->Unknown = 1 ;
+               return;
+       }
+       switch(inst->U.I.Opcode){
+       case RC_OPCODE_ADD:
+               count_inst->Amount += amount;
+               break;
+       case RC_OPCODE_SUB:
+               if(amnt_src_index == 0){
+                       count_inst->Unknown = 0;
+                       return;
+               }
+               count_inst->Amount -= amount;
+               break;
+       default:
+               count_inst->Unknown = 1;
+               return;
+       }
+}
+
+/**
+ * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
+{
+       int end_loops;
+       int iterations;
+       struct count_inst count_inst;
+       float limit_value;
+       struct rc_src_register * counter;
+       struct rc_src_register * limit;
+       struct const_value counter_value;
+       struct rc_instruction * inst;
+
+       /* Find the counter and the upper limit */
+
+       if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
+               limit = &loop->Cond->U.I.SrcReg[0];
+               counter = &loop->Cond->U.I.SrcReg[1];
+       }
+       else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
+               limit = &loop->Cond->U.I.SrcReg[1];
+               counter = &loop->Cond->U.I.SrcReg[0];
+       }
+       else{
+               DBG("No constant limit.\n");
+               return 0;
+       }
+
+       /* Find the initial value of the counter */
+       counter_value.Src = counter;
+       counter_value.Value = 0.0f;
+       counter_value.HasValue = 0;
+       counter_value.C = c;
+       for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
+                                                       inst = inst->Next){
+               rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+       }
+       if(!counter_value.HasValue){
+               DBG("Initial counter value cannot be determined.\n");
+               return 0;
+       }
+       DBG("Initial counter value is %f\n", counter_value.Value);
+       /* Determine how the counter is modified each loop */
+       count_inst.C = c;
+       count_inst.Index = counter->Index;
+       count_inst.Swz = counter->Swizzle;
+       count_inst.Amount = 0.0f;
+       count_inst.Unknown = 0;
+       end_loops = 1;
+       for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+               switch(inst->U.I.Opcode){
+               /* XXX In the future we might want to try to unroll nested
+                * loops here.*/
+               case RC_OPCODE_BGNLOOP:
+                       end_loops++;
+                       break;
+               case RC_OPCODE_ENDLOOP:
+                       loop->EndLoop = inst;
+                       end_loops--;
+                       break;
+               case RC_OPCODE_BRK:
+                       /* Don't unroll loops if it has a BRK instruction
+                        * other one used when testing the main conditional
+                        * of the loop. */
+
+                       /* Make sure we haven't entered a nested loops. */
+                       if(inst != loop->Brk && end_loops == 1) {
+                               return 0;
+                       }
+                       break;
+               /* XXX Check if the counter is modified within an if statement.
+                */
+               case RC_OPCODE_IF:
+                       break;
+               default:
+                       rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+                       if(count_inst.Unknown){
+                               return 0;
+                       }
+                       break;
+               }
+       }
+       /* Infinite loop */
+       if(count_inst.Amount == 0.0f){
+               return 0;
+       }
+       DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+       /* Calculate the number of iterations of this loop.  Keeping this
+        * simple, since we only support increment and decrement loops.
+        */
+       limit_value = get_constant_value(c, limit, 0);
+       DBG("Limit is %f.\n", limit_value);
+       /* The iteration calculations are opposite of what you would expect.
+        * In a normal loop, if the condition is met, then loop continues, but
+        * with our loops, if the condition is met, the is exited. */
+       switch(loop->Cond->U.I.Opcode){
+       case RC_OPCODE_SGE:
+       case RC_OPCODE_SLE:
+               iterations = (int) ceilf((limit_value - counter_value.Value) /
+                                                       count_inst.Amount);
+               break;
+
+       case RC_OPCODE_SGT:
+       case RC_OPCODE_SLT:
+               iterations = (int) floorf((limit_value - counter_value.Value) /
+                                                       count_inst.Amount) + 1;
+               break;
+       default:
+               return 0;
+       }
+
+       if (c->max_alu_insts > 0
+               && iterations > loop_max_possible_iterations(c, loop)) {
+               return 0;
+       }
+
+       DBG("Loop will have %d iterations.\n", iterations);
+
+       /* Prepare loop for unrolling */
+       rc_remove_instruction(loop->Cond);
+       rc_remove_instruction(loop->If);
+       rc_remove_instruction(loop->Brk);
+       rc_remove_instruction(loop->EndIf);
+
+       unroll_loop(c, loop, iterations);
+       loop->EndLoop = NULL;
+       return 1;
+}
+
+/**
+ * @param c
+ * @param loop
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
+ */
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
+                                               struct rc_instruction * inst)
+{
+       struct rc_instruction * ptr;
+
+       if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+               rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+               return 0;
+       }
+
+       memset(loop, 0, sizeof(struct loop_info));
+
+       loop->BeginLoop = inst;
+
+       for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+               if (ptr == &c->Program.Instructions) {
+                       rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+                                                               __FUNCTION__);
+                       return 0;
+               }
+
+               switch(ptr->U.I.Opcode){
+               case RC_OPCODE_BGNLOOP:
+               {
+                       /* Nested loop, skip ahead to the end. */
+                       unsigned int loop_depth = 1;
+                       for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+                                                       ptr = ptr->Next){
+                               if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+                                       loop_depth++;
+                               } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+                                       if (!--loop_depth) {
+                                               break;
+                                       }
+                               }
+                       }
+                       if (ptr == &c->Program.Instructions) {
+                               rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+                                                               __FUNCTION__);
+                                       return 0;
+                       }
+                       break;
+               }
+               case RC_OPCODE_BRK:
+                       if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+                                       || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+                                       || loop->Brk){
+                               continue;
+                       }
+                       loop->Brk = ptr;
+                       loop->If = ptr->Prev;
+                       loop->EndIf = ptr->Next;
+                       switch(loop->If->Prev->U.I.Opcode){
+                       case RC_OPCODE_SLT:
+                       case RC_OPCODE_SGE:
+                       case RC_OPCODE_SGT:
+                       case RC_OPCODE_SLE:
+                       case RC_OPCODE_SEQ:
+                       case RC_OPCODE_SNE:
+                               break;
+                       default:
+                               return 0;
+                       }
+                       loop->Cond = loop->If->Prev;
+                       break;
+
+               case RC_OPCODE_ENDLOOP:
+                       loop->EndLoop = ptr;
+                       break;
+               }
+       }
+
+       if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+                                       && loop->Cond && loop->EndLoop) {
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement.  Here is an outline of the conversion process:
+ * BGNLOOP;                            -> BGNLOOP;
+ * <Additional conditional code>       -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2];  -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0];                         -> IF temp[0];
+ * BRK;                                ->
+ * ENDIF;                              -> <Loop Body>
+ * <Loop Body>                         -> ENDIF;
+ * ENDLOOP;                            -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+                                               struct rc_instruction * inst)
+{
+       struct loop_info * loop;
+
+       memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+                       s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+       loop = &s->Loops[s->LoopCount++];
+
+       if (!build_loop_info(s->C, loop, inst)) {
+               rc_error(s->C, "Failed to build loop info\n");
+               return 0;
+       }
+
+       if(try_unroll_loop(s->C, loop)){
+               return 1;
+       }
+
+       /* Reverse the conditional instruction */
+       switch(loop->Cond->U.I.Opcode){
+       case RC_OPCODE_SGE:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+               break;
+       case RC_OPCODE_SLT:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+               break;
+       case RC_OPCODE_SLE:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+               break;
+       case RC_OPCODE_SGT:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+               break;
+       case RC_OPCODE_SEQ:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+               break;
+       case RC_OPCODE_SNE:
+               loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+               break;
+       default:
+               rc_error(s->C, "loop->Cond is not a conditional.\n");
+               return 0;
+       }
+
+       /* Prepare the loop to be emulated */
+       rc_remove_instruction(loop->Brk);
+       rc_remove_instruction(loop->EndIf);
+       rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+       return 1;
+}
+
+void rc_transform_loops(struct radeon_compiler *c, void *user)
+{
+       struct emulate_loop_state * s = &c->loop_state;
+       struct rc_instruction * ptr;
+
+       memset(s, 0, sizeof(struct emulate_loop_state));
+       s->C = c;
+       for(ptr = s->C->Program.Instructions.Next;
+                       ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
+               if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+                                       ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+                       if (!transform_loop(s, ptr))
+                               return;
+               }
+       }
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, void *user)
+{
+       struct rc_instruction * inst;
+       struct loop_info loop;
+
+       for(inst = c->Program.Instructions.Next;
+                       inst != &c->Program.Instructions; inst = inst->Next) {
+
+               if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+                       if (build_loop_info(c, &loop, inst)) {
+                               try_unroll_loop(c, &loop);
+                       }
+               }
+       }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, void *user)
+{
+       struct emulate_loop_state * s = &c->loop_state;
+       int i;
+       /* Iterate backwards of the list of loops so that loops that nested
+        * loops are unrolled first.
+        */
+       for( i = s->LoopCount - 1; i >= 0; i-- ){
+               unsigned int iterations;
+
+               if(!s->Loops[i].EndLoop){
+                       continue;
+               }
+               iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
+               unroll_loop(s->C, &s->Loops[i], iterations);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
new file mode 100644 (file)
index 0000000..cd800c0
--- /dev/null
@@ -0,0 +1,32 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+struct loop_info {
+       struct rc_instruction * BeginLoop;
+       struct rc_instruction * Cond;
+       struct rc_instruction * If;
+       struct rc_instruction * Brk;
+       struct rc_instruction * EndIf;
+       struct rc_instruction * EndLoop;
+};
+
+struct emulate_loop_state {
+       struct radeon_compiler * C;
+       struct loop_info * Loops;
+       unsigned int LoopCount;
+       unsigned int LoopReserved;
+};
+
+void rc_transform_loops(struct radeon_compiler *c, void *user);
+
+void rc_unroll_loops(struct radeon_compiler * c, void *user);
+
+void rc_emulate_loops(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c
new file mode 100644 (file)
index 0000000..811c908
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+       struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+       new->Item = item;
+       new->Next = NULL;
+       new->Prev = NULL;
+
+       return new;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+       struct rc_list * temp;
+
+       if (*list == NULL) {
+               *list = new_value;
+               return;
+       }
+
+       for (temp = *list; temp->Next; temp = temp->Next);
+
+       temp->Next = new_value;
+       new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+       if (*list == rm_value) {
+               *list = rm_value->Next;
+               return;
+       }
+
+       rm_value->Prev->Next = rm_value->Next;
+       if (rm_value->Next) {
+               rm_value->Next->Prev = rm_value->Prev;
+       }
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+       unsigned int count = 0;
+       while (list) {
+               count++;
+               list = list->Next;
+       }
+       return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+       while(list) {
+               fprintf(stderr, "%p->", list->Item);
+               list = list->Next;
+       }
+       fprintf(stderr, "\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h
new file mode 100644 (file)
index 0000000..b3c8f89
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+       void * Item;
+       struct rc_list * Prev;
+       struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
new file mode 100644 (file)
index 0000000..afd78ad
--- /dev/null
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+       {
+               .Opcode = RC_OPCODE_NOP,
+               .Name = "NOP"
+       },
+       {
+               .Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+               .Name = "ILLEGAL OPCODE"
+       },
+       {
+               .Opcode = RC_OPCODE_ABS,
+               .Name = "ABS",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_ADD,
+               .Name = "ADD",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_ARL,
+               .Name = "ARL",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_CEIL,
+               .Name = "CEIL",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_CLAMP,
+               .Name = "CLAMP",
+               .NumSrcRegs = 3,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_CMP,
+               .Name = "CMP",
+               .NumSrcRegs = 3,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_CND,
+               .Name = "CND",
+               .NumSrcRegs = 3,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_COS,
+               .Name = "COS",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DDX,
+               .Name = "DDX",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DDY,
+               .Name = "DDY",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DP2,
+               .Name = "DP2",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DP3,
+               .Name = "DP3",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DP4,
+               .Name = "DP4",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DPH,
+               .Name = "DPH",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_DST,
+               .Name = "DST",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_EX2,
+               .Name = "EX2",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_EXP,
+               .Name = "EXP",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_FLR,
+               .Name = "FLR",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_FRC,
+               .Name = "FRC",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_KIL,
+               .Name = "KIL",
+               .NumSrcRegs = 1
+       },
+       {
+               .Opcode = RC_OPCODE_LG2,
+               .Name = "LG2",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_LIT,
+               .Name = "LIT",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_LOG,
+               .Name = "LOG",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_LRP,
+               .Name = "LRP",
+               .NumSrcRegs = 3,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_MAD,
+               .Name = "MAD",
+               .NumSrcRegs = 3,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_MAX,
+               .Name = "MAX",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_MIN,
+               .Name = "MIN",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_MOV,
+               .Name = "MOV",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_MUL,
+               .Name = "MUL",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_POW,
+               .Name = "POW",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_RCP,
+               .Name = "RCP",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_RSQ,
+               .Name = "RSQ",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SCS,
+               .Name = "SCS",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SEQ,
+               .Name = "SEQ",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SFL,
+               .Name = "SFL",
+               .NumSrcRegs = 0,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SGE,
+               .Name = "SGE",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SGT,
+               .Name = "SGT",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SIN,
+               .Name = "SIN",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsStandardScalar = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SLE,
+               .Name = "SLE",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SLT,
+               .Name = "SLT",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SNE,
+               .Name = "SNE",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SSG,
+               .Name = "SSG",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SUB,
+               .Name = "SUB",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_SWZ,
+               .Name = "SWZ",
+               .NumSrcRegs = 1,
+               .HasDstReg = 1,
+               .IsComponentwise = 1
+       },
+       {
+               .Opcode = RC_OPCODE_XPD,
+               .Name = "XPD",
+               .NumSrcRegs = 2,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_TEX,
+               .Name = "TEX",
+               .HasTexture = 1,
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_TXB,
+               .Name = "TXB",
+               .HasTexture = 1,
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_TXD,
+               .Name = "TXD",
+               .HasTexture = 1,
+               .NumSrcRegs = 3,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_TXL,
+               .Name = "TXL",
+               .HasTexture = 1,
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_TXP,
+               .Name = "TXP",
+               .HasTexture = 1,
+               .NumSrcRegs = 1,
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_IF,
+               .Name = "IF",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 1
+       },
+       {
+               .Opcode = RC_OPCODE_ELSE,
+               .Name = "ELSE",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_ENDIF,
+               .Name = "ENDIF",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_BGNLOOP,
+               .Name = "BGNLOOP",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_BRK,
+               .Name = "BRK",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_ENDLOOP,
+               .Name = "ENDLOOP",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0,
+       },
+       {
+               .Opcode = RC_OPCODE_CONT,
+               .Name = "CONT",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_REPL_ALPHA,
+               .Name = "REPL_ALPHA",
+               .HasDstReg = 1
+       },
+       {
+               .Opcode = RC_OPCODE_BEGIN_TEX,
+               .Name = "BEGIN_TEX"
+       },
+       {
+               .Opcode = RC_OPCODE_KILP,
+               .Name = "KILP",
+       }
+};
+
+void rc_compute_sources_for_writemask(
+               const struct rc_instruction *inst,
+               unsigned int writemask,
+               unsigned int *srcmasks)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       srcmasks[0] = 0;
+       srcmasks[1] = 0;
+       srcmasks[2] = 0;
+
+       if (opcode->Opcode == RC_OPCODE_KIL)
+               srcmasks[0] |= RC_MASK_XYZW;
+       else if (opcode->Opcode == RC_OPCODE_IF)
+               srcmasks[0] |= RC_MASK_X;
+
+       if (!writemask)
+               return;
+
+       if (opcode->IsComponentwise) {
+               for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+                       srcmasks[src] |= writemask;
+       } else if (opcode->IsStandardScalar) {
+               for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+                       srcmasks[src] |= RC_MASK_X;
+       } else {
+               switch(opcode->Opcode) {
+               case RC_OPCODE_ARL:
+                       srcmasks[0] |= RC_MASK_X;
+                       break;
+               case RC_OPCODE_DP2:
+                       srcmasks[0] |= RC_MASK_XY;
+                       srcmasks[1] |= RC_MASK_XY;
+                       break;
+               case RC_OPCODE_DP3:
+               case RC_OPCODE_XPD:
+                       srcmasks[0] |= RC_MASK_XYZ;
+                       srcmasks[1] |= RC_MASK_XYZ;
+                       break;
+               case RC_OPCODE_DP4:
+                       srcmasks[0] |= RC_MASK_XYZW;
+                       srcmasks[1] |= RC_MASK_XYZW;
+                       break;
+               case RC_OPCODE_DPH:
+                       srcmasks[0] |= RC_MASK_XYZ;
+                       srcmasks[1] |= RC_MASK_XYZW;
+                       break;
+               case RC_OPCODE_TXB:
+               case RC_OPCODE_TXP:
+               case RC_OPCODE_TXL:
+                       srcmasks[0] |= RC_MASK_W;
+                       /* Fall through */
+               case RC_OPCODE_TEX:
+                       switch (inst->U.I.TexSrcTarget) {
+                               case RC_TEXTURE_1D:
+                                       srcmasks[0] |= RC_MASK_X;
+                                       break;
+                               case RC_TEXTURE_2D:
+                               case RC_TEXTURE_RECT:
+                               case RC_TEXTURE_1D_ARRAY:
+                                       srcmasks[0] |= RC_MASK_XY;
+                                       break;
+                               case RC_TEXTURE_3D:
+                               case RC_TEXTURE_CUBE:
+                               case RC_TEXTURE_2D_ARRAY:
+                                       srcmasks[0] |= RC_MASK_XYZ;
+                                       break;
+                       }
+                       break;
+               case RC_OPCODE_TXD:
+                       switch (inst->U.I.TexSrcTarget) {
+                               case RC_TEXTURE_1D_ARRAY:
+                                       srcmasks[0] |= RC_MASK_Y;
+                                       /* Fall through. */
+                               case RC_TEXTURE_1D:
+                                       srcmasks[0] |= RC_MASK_X;
+                                       srcmasks[1] |= RC_MASK_X;
+                                       srcmasks[2] |= RC_MASK_X;
+                                       break;
+                               case RC_TEXTURE_2D_ARRAY:
+                                       srcmasks[0] |= RC_MASK_Z;
+                                       /* Fall through. */
+                               case RC_TEXTURE_2D:
+                               case RC_TEXTURE_RECT:
+                                       srcmasks[0] |= RC_MASK_XY;
+                                       srcmasks[1] |= RC_MASK_XY;
+                                       srcmasks[2] |= RC_MASK_XY;
+                                       break;
+                               case RC_TEXTURE_3D:
+                               case RC_TEXTURE_CUBE:
+                                       srcmasks[0] |= RC_MASK_XYZ;
+                                       srcmasks[1] |= RC_MASK_XYZ;
+                                       srcmasks[2] |= RC_MASK_XYZ;
+                                       break;
+                       }
+                       break;
+               case RC_OPCODE_DST:
+                       srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+                       srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+                       break;
+               case RC_OPCODE_EXP:
+               case RC_OPCODE_LOG:
+                       srcmasks[0] |= RC_MASK_XY;
+                       break;
+               case RC_OPCODE_LIT:
+                       srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+                       break;
+               default:
+                       break;
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
new file mode 100644 (file)
index 0000000..b586882
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+       RC_OPCODE_NOP = 0,
+       RC_OPCODE_ILLEGAL_OPCODE,
+
+       /** vec4 instruction: dst.c = abs(src0.c); */
+       RC_OPCODE_ABS,
+
+       /** vec4 instruction: dst.c = src0.c + src1.c; */
+       RC_OPCODE_ADD,
+
+       /** special instruction: load address register
+        * dst.x = floor(src.x), where dst must be an address register */
+       RC_OPCODE_ARL,
+
+       /** vec4 instruction: dst.c = ceil(src0.c) */
+       RC_OPCODE_CEIL,
+
+       /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+       RC_OPCODE_CLAMP,
+
+       /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+       RC_OPCODE_CMP,
+
+       /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
+       RC_OPCODE_CND,
+
+       /** scalar instruction: dst = cos(src0.x) */
+       RC_OPCODE_COS,
+
+       /** special instruction: take vec4 partial derivative in X direction
+        * dst.c = d src0.c / dx */
+       RC_OPCODE_DDX,
+
+       /** special instruction: take vec4 partial derivative in Y direction
+        * dst.c = d src0.c / dy */
+       RC_OPCODE_DDY,
+
+       /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+       RC_OPCODE_DP2,
+
+       /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+       RC_OPCODE_DP3,
+
+       /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+       RC_OPCODE_DP4,
+
+       /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+       RC_OPCODE_DPH,
+
+       /** special instruction, see ARB_fragment_program */
+       RC_OPCODE_DST,
+
+       /** scalar instruction: dst = 2**src0.x */
+       RC_OPCODE_EX2,
+
+       /** special instruction, see ARB_vertex_program */
+       RC_OPCODE_EXP,
+
+       /** vec4 instruction: dst.c = floor(src0.c) */
+       RC_OPCODE_FLR,
+
+       /** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+       RC_OPCODE_FRC,
+
+       /** special instruction: stop execution if any component of src0 is negative */
+       RC_OPCODE_KIL,
+
+       /** scalar instruction: dst = log_2(src0.x) */
+       RC_OPCODE_LG2,
+
+       /** special instruction, see ARB_vertex_program */
+       RC_OPCODE_LIT,
+
+       /** special instruction, see ARB_vertex_program */
+       RC_OPCODE_LOG,
+
+       /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+       RC_OPCODE_LRP,
+
+       /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+       RC_OPCODE_MAD,
+
+       /** vec4 instruction: dst.c = max(src0.c, src1.c) */
+       RC_OPCODE_MAX,
+
+       /** vec4 instruction: dst.c = min(src0.c, src1.c) */
+       RC_OPCODE_MIN,
+
+       /** vec4 instruction: dst.c = src0.c */
+       RC_OPCODE_MOV,
+
+       /** vec4 instruction: dst.c = src0.c*src1.c */
+       RC_OPCODE_MUL,
+
+       /** scalar instruction: dst = src0.x ** src1.x */
+       RC_OPCODE_POW,
+
+       /** scalar instruction: dst = 1 / src0.x */
+       RC_OPCODE_RCP,
+
+       /** scalar instruction: dst = 1 / sqrt(src0.x) */
+       RC_OPCODE_RSQ,
+
+       /** special instruction, see ARB_fragment_program */
+       RC_OPCODE_SCS,
+
+       /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SEQ,
+
+       /** vec4 instruction: dst.c = 0.0 */
+       RC_OPCODE_SFL,
+
+       /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SGE,
+
+       /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SGT,
+
+       /** scalar instruction: dst = sin(src0.x) */
+       RC_OPCODE_SIN,
+
+       /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SLE,
+
+       /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SLT,
+
+       /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+       RC_OPCODE_SNE,
+
+       /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+       RC_OPCODE_SSG,
+
+       /** vec4 instruction: dst.c = src0.c - src1.c */
+       RC_OPCODE_SUB,
+
+       /** vec4 instruction: dst.c = src0.c */
+       RC_OPCODE_SWZ,
+
+       /** special instruction, see ARB_fragment_program */
+       RC_OPCODE_XPD,
+
+       RC_OPCODE_TEX,
+       RC_OPCODE_TXB,
+       RC_OPCODE_TXD,
+       RC_OPCODE_TXL,
+       RC_OPCODE_TXP,
+
+       /** branch instruction:
+        * If src0.x != 0.0, continue with the next instruction;
+        * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+        */
+       RC_OPCODE_IF,
+
+       /** branch instruction: jump to matching RC_OPCODE_ENDIF */
+       RC_OPCODE_ELSE,
+
+       /** branch instruction: has no effect */
+       RC_OPCODE_ENDIF,
+       
+       RC_OPCODE_BGNLOOP,
+
+       RC_OPCODE_BRK,
+
+       RC_OPCODE_ENDLOOP,
+
+       RC_OPCODE_CONT,
+
+       /** special instruction, used in R300-R500 fragment program pair instructions
+        * indicates that the result of the alpha operation shall be replicated
+        * across all other channels */
+       RC_OPCODE_REPL_ALPHA,
+
+       /** special instruction, used in R300-R500 fragment programs
+        * to indicate the start of a block of texture instructions that
+        * can run simultaneously. */
+       RC_OPCODE_BEGIN_TEX,
+
+       /** Stop execution of the shader (GLSL discard) */
+       RC_OPCODE_KILP,
+
+       MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+       rc_opcode Opcode;
+       const char * Name;
+
+       /** true if the instruction reads from a texture.
+        *
+        * \note This is false for the KIL instruction, even though KIL is
+        * a texture instruction from a hardware point of view. */
+       unsigned int HasTexture:1;
+
+       unsigned int NumSrcRegs:2;
+       unsigned int HasDstReg:1;
+
+       /** true if this instruction affects control flow */
+       unsigned int IsFlowControl:1;
+
+       /** true if this is a vector instruction that operates on components in parallel
+        * without any cross-component interaction */
+       unsigned int IsComponentwise:1;
+
+       /** true if this instruction sources only its operands X components
+        * to compute one result which is smeared across all output channels */
+       unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+       assert((unsigned int)opcode < MAX_RC_OPCODE);
+       assert(rc_opcodes[opcode].Opcode == opcode);
+
+       return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+               const struct rc_instruction *inst,
+               unsigned int writemask,
+               unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
new file mode 100644 (file)
index 0000000..39dcb21
--- /dev/null
@@ -0,0 +1,700 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_swizzle.h"
+
+struct src_clobbered_reads_cb_data {
+       rc_register_file File;
+       unsigned int Index;
+       unsigned int Mask;
+       struct rc_reader_data * ReaderData;
+};
+
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
+                                               struct rc_instruction *,
+                                               unsigned int);
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+       struct rc_src_register combine;
+       combine.File = inner.File;
+       combine.Index = inner.Index;
+       combine.RelAddr = inner.RelAddr;
+       if (outer.Abs) {
+               combine.Abs = 1;
+               combine.Negate = outer.Negate;
+       } else {
+               combine.Abs = inner.Abs;
+               combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
+               combine.Negate ^= outer.Negate;
+       }
+       combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+       return combine;
+}
+
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
+                                               struct rc_src_register * src)
+{
+       rc_register_file file = src->File;
+       struct rc_reader_data * reader_data = data;
+
+       if(!rc_inst_can_use_presub(inst,
+                               reader_data->Writer->U.I.PreSub.Opcode,
+                               rc_swizzle_to_writemask(src->Swizzle),
+                               src,
+                               &reader_data->Writer->U.I.PreSub.SrcReg[0],
+                               &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
+               reader_data->Abort = 1;
+               return;
+       }
+
+       /* XXX This could probably be handled better. */
+       if (file == RC_FILE_ADDRESS) {
+               reader_data->Abort = 1;
+               return;
+       }
+
+       /* These instructions cannot read from the constants file.
+        * see radeonTransformTEX()
+        */
+       if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+                       reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+                               (inst->U.I.Opcode == RC_OPCODE_TEX ||
+                               inst->U.I.Opcode == RC_OPCODE_TXB ||
+                               inst->U.I.Opcode == RC_OPCODE_TXP ||
+                               inst->U.I.Opcode == RC_OPCODE_TXD ||
+                               inst->U.I.Opcode == RC_OPCODE_TXL ||
+                               inst->U.I.Opcode == RC_OPCODE_KIL)){
+               reader_data->Abort = 1;
+               return;
+       }
+}
+
+static void src_clobbered_reads_cb(
+       void * data,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       struct src_clobbered_reads_cb_data * sc_data = data;
+
+       if (src->File == sc_data->File
+           && src->Index == sc_data->Index
+           && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
+
+               sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+       }
+
+       if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
+               sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+       }
+}
+
+static void is_src_clobbered_scan_write(
+       void * data,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct src_clobbered_reads_cb_data sc_data;
+       struct rc_reader_data * reader_data = data;
+       sc_data.File = file;
+       sc_data.Index = index;
+       sc_data.Mask = mask;
+       sc_data.ReaderData = reader_data;
+       rc_for_all_reads_src(reader_data->Writer,
+                                       src_clobbered_reads_cb, &sc_data);
+}
+
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+       struct rc_reader_data reader_data;
+       unsigned int i;
+
+       if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+           inst_mov->U.I.WriteALUResult ||
+           inst_mov->U.I.SaturateMode)
+               return;
+
+       /* Get a list of all the readers of this MOV instruction. */
+       reader_data.ExitOnAbort = 1;
+       rc_get_readers(c, inst_mov, &reader_data,
+                      copy_propagate_scan_read, NULL,
+                      is_src_clobbered_scan_write);
+
+       if (reader_data.Abort || reader_data.ReaderCount == 0)
+               return;
+
+       /* Propagate the MOV instruction. */
+       for (i = 0; i < reader_data.ReaderCount; i++) {
+               struct rc_instruction * inst = reader_data.Readers[i].Inst;
+               *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
+
+               if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+                       inst->U.I.PreSub = inst_mov->U.I.PreSub;
+       }
+
+       /* Finally, remove the original MOV instruction */
+       rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+               rc_swizzle * pswz, unsigned int * pnegate)
+{
+       int have_used = 0;
+
+       if (src.File != RC_FILE_NONE) {
+               *pswz = 0;
+               return 0;
+       }
+
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               unsigned int swz = GET_SWZ(src.Swizzle, chan);
+               if (swz < 4) {
+                       *pswz = 0;
+                       return 0;
+               }
+               if (swz == RC_SWIZZLE_UNUSED)
+                       continue;
+
+               if (!have_used) {
+                       *pswz = swz;
+                       *pnegate = GET_BIT(src.Negate, chan);
+                       have_used = 1;
+               } else {
+                       if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+                               *pswz = 0;
+                               return 0;
+                       }
+               }
+       }
+
+       return 1;
+}
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+       rc_swizzle swz = 0;
+       unsigned int negate= 0;
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MUL;
+                       return;
+               }
+       }
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ONE) {
+                       inst->U.I.Opcode = RC_OPCODE_ADD;
+                       if (negate)
+                               inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+                       inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+                       return;
+               } else if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+                       return;
+               }
+       }
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ONE) {
+                       inst->U.I.Opcode = RC_OPCODE_ADD;
+                       if (negate)
+                               inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+                       inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+                       return;
+               } else if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+                       return;
+               }
+       }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+       rc_swizzle swz = 0;
+       unsigned int negate = 0;
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ONE) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+                       if (negate)
+                               inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+                       return;
+               } else if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+                       return;
+               }
+       }
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ONE) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       if (negate)
+                               inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+                       return;
+               } else if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+                       return;
+               }
+       }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+       rc_swizzle swz = 0;
+       unsigned int negate = 0;
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+                       return;
+               }
+       }
+
+       if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+               if (swz == RC_SWIZZLE_ZERO) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+                       return;
+               }
+       }
+}
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned int i;
+
+       /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+       for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+               struct rc_constant * constant;
+               struct rc_src_register newsrc;
+               int have_real_reference;
+               unsigned int chan;
+
+               /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
+               for (chan = 0; chan < 4; ++chan)
+                       if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
+                               break;
+               if (chan == 4) {
+                       inst->U.I.SrcReg[src].File = RC_FILE_NONE;
+                       continue;
+               }
+
+               /* Convert immediates to swizzles. */
+               if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+                   inst->U.I.SrcReg[src].RelAddr ||
+                   inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+                       continue;
+
+               constant =
+                       &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+               if (constant->Type != RC_CONSTANT_IMMEDIATE)
+                       continue;
+
+               newsrc = inst->U.I.SrcReg[src];
+               have_real_reference = 0;
+               for (chan = 0; chan < 4; ++chan) {
+                       unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+                       unsigned int newswz;
+                       float imm;
+                       float baseimm;
+
+                       if (swz >= 4)
+                               continue;
+
+                       imm = constant->u.Immediate[swz];
+                       baseimm = imm;
+                       if (imm < 0.0)
+                               baseimm = -baseimm;
+
+                       if (baseimm == 0.0) {
+                               newswz = RC_SWIZZLE_ZERO;
+                       } else if (baseimm == 1.0) {
+                               newswz = RC_SWIZZLE_ONE;
+                       } else if (baseimm == 0.5 && c->has_half_swizzles) {
+                               newswz = RC_SWIZZLE_HALF;
+                       } else {
+                               have_real_reference = 1;
+                               continue;
+                       }
+
+                       SET_SWZ(newsrc.Swizzle, chan, newswz);
+                       if (imm < 0.0 && !newsrc.Abs)
+                               newsrc.Negate ^= 1 << chan;
+               }
+
+               if (!have_real_reference) {
+                       newsrc.File = RC_FILE_NONE;
+                       newsrc.Index = 0;
+               }
+
+               /* don't make the swizzle worse */
+               if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+                   c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+                       continue;
+
+               inst->U.I.SrcReg[src] = newsrc;
+       }
+
+       /* Simplify instructions based on constants */
+       if (inst->U.I.Opcode == RC_OPCODE_MAD)
+               constant_folding_mad(inst);
+
+       /* note: MAD can simplify to MUL or ADD */
+       if (inst->U.I.Opcode == RC_OPCODE_MUL)
+               constant_folding_mul(inst);
+       else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+               constant_folding_add(inst);
+
+       /* In case this instruction has been converted, make sure all of the
+        * registers that are no longer used are empty. */
+       opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       for(i = opcode->NumSrcRegs; i < 3; i++) {
+               memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+       }
+}
+
+/**
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src.  Otherwise zero is returned.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+                                               struct rc_dst_register dst)
+{
+       if (dst.File != src.File || dst.Index != src.Index) {
+               return 0;
+       }
+       return rc_swizzle_to_writemask(src.Swizzle);
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels.  Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+       int chan;
+       for(chan = 0; chan < 4; chan++) {
+               unsigned int swz = GET_SWZ(src.Swizzle, chan);
+               if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+                                               || swz == RC_SWIZZLE_ONE) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static void presub_scan_read(
+       void * data,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       struct rc_reader_data * reader_data = data;
+       rc_presubtract_op * presub_opcode = reader_data->CbData;
+
+       if (!rc_inst_can_use_presub(inst, *presub_opcode,
+                       reader_data->Writer->U.I.DstReg.WriteMask,
+                       src,
+                       &reader_data->Writer->U.I.SrcReg[0],
+                       &reader_data->Writer->U.I.SrcReg[1])) {
+               reader_data->Abort = 1;
+               return;
+       }
+}
+
+static int presub_helper(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add,
+       rc_presubtract_op presub_opcode,
+       rc_presub_replace_fn presub_replace)
+{
+       struct rc_reader_data reader_data;
+       unsigned int i;
+       rc_presubtract_op cb_op = presub_opcode;
+
+       reader_data.CbData = &cb_op;
+       reader_data.ExitOnAbort = 1;
+       rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
+                                               is_src_clobbered_scan_write);
+
+       if (reader_data.Abort || reader_data.ReaderCount == 0)
+               return 0;
+
+       for(i = 0; i < reader_data.ReaderCount; i++) {
+               unsigned int src_index;
+               struct rc_reader reader = reader_data.Readers[i];
+               const struct rc_opcode_info * info =
+                               rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+               for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+                       if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
+                               presub_replace(inst_add, reader.Inst, src_index);
+               }
+       }
+       return 1;
+}
+
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+       struct rc_instruction * inst_add,
+       struct rc_instruction * inst_reader,
+       unsigned int src_index)
+{
+       rc_presubtract_op presub_opcode;
+       if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
+               presub_opcode = RC_PRESUB_SUB;
+       else
+               presub_opcode = RC_PRESUB_ADD;
+
+       if (inst_add->U.I.SrcReg[1].Negate) {
+               inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+               inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
+       } else {
+               inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+               inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
+       }
+       inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+       inst_reader->U.I.PreSub.Opcode = presub_opcode;
+       inst_reader->U.I.SrcReg[src_index] =
+                       chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+                                       inst_reader->U.I.PreSub.SrcReg[0]);
+       inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
+}
+
+static int is_presub_candidate(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned int i;
+       unsigned int is_constant[2] = {0, 0};
+
+       assert(inst->U.I.Opcode == RC_OPCODE_ADD);
+
+       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+                       || inst->U.I.SaturateMode
+                       || inst->U.I.WriteALUResult) {
+               return 0;
+       }
+
+       /* If both sources use a constant swizzle, then we can't convert it to
+        * a presubtract operation.  In fact for the ADD and SUB presubtract
+        * operations neither source can contain a constant swizzle.  This
+        * specific case is checked in peephole_add_presub_add() when
+        * we make sure the swizzles for both sources are equal, so we
+        * don't need to worry about it here. */
+       for (i = 0; i < 2; i++) {
+               int chan;
+               for (chan = 0; chan < 4; chan++) {
+                       rc_swizzle swz =
+                               get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
+                       if (swz == RC_SWIZZLE_ONE
+                                       || swz == RC_SWIZZLE_ZERO
+                                       || swz == RC_SWIZZLE_HALF) {
+                               is_constant[i] = 1;
+                       }
+               }
+       }
+       if (is_constant[0] && is_constant[1])
+               return 0;
+
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               struct rc_src_register src = inst->U.I.SrcReg[i];
+               if (src_reads_dst_mask(src, inst->U.I.DstReg))
+                       return 0;
+
+               src.File = RC_FILE_PRESUB;
+               if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
+                       return 0;
+       }
+       return 1;
+}
+
+static int peephole_add_presub_add(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
+
+       if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+               return 0;
+
+       /* src0 and src1 can't have absolute values */
+       if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+               return 0;
+
+       /* presub_replace_add() assumes only one is negative */
+       if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+               return 0;
+
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+               return 0;
+
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+               return 0;
+
+       if (!is_presub_candidate(c, inst_add))
+               return 0;
+
+       if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
+               rc_remove_instruction(inst_add);
+               return 1;
+       }
+       return 0;
+}
+
+static void presub_replace_inv(
+       struct rc_instruction * inst_add,
+       struct rc_instruction * inst_reader,
+       unsigned int src_index)
+{
+       /* We must be careful not to modify inst_add, since it
+        * is possible it will remain part of the program.*/
+       inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+       inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+       inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+                                               inst_reader->U.I.PreSub.SrcReg[0]);
+
+       inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ *     0 if the ADD instruction is still part of the program.
+ *     1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       unsigned int i, swz;
+
+       if (!is_presub_candidate(c, inst_add))
+               return 0;
+
+       /* Check if src0 is 1. */
+       /* XXX It would be nice to use is_src_uniform_constant here, but that
+        * function only works if the register's file is RC_FILE_NONE */
+       for(i = 0; i < 4; i++ ) {
+               swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+               if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+                                               && swz != RC_SWIZZLE_ONE) {
+                       return 0;
+               }
+       }
+
+       /* Check src1. */
+       if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+                                               inst_add->U.I.DstReg.WriteMask
+               || inst_add->U.I.SrcReg[1].Abs
+               || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+                       && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+               || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+               return 0;
+       }
+
+       if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
+               rc_remove_instruction(inst_add);
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * @return
+ *     0 if inst is still part of the program.
+ *     1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+       switch(inst->U.I.Opcode){
+       case RC_OPCODE_ADD:
+               if (c->has_presub) {
+                       if(peephole_add_presub_inv(c, inst))
+                               return 1;
+                       if(peephole_add_presub_add(c, inst))
+                               return 1;
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+void rc_optimize(struct radeon_compiler * c, void *user)
+{
+       struct rc_instruction * inst = c->Program.Instructions.Next;
+       while(inst != &c->Program.Instructions) {
+               struct rc_instruction * cur = inst;
+               inst = inst->Next;
+
+               constant_folding(c, cur);
+
+               if(peephole(c, cur))
+                       continue;
+
+               if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+                       copy_propagate(c, cur);
+                       /* cur may no longer be part of the program */
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
new file mode 100644 (file)
index 0000000..1e9a2c0
--- /dev/null
@@ -0,0 +1,62 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_pair.h"
+
+static void mark_used_presub(struct rc_pair_sub_instruction * sub)
+{
+       if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
+               unsigned int presub_reg_count = rc_presubtract_src_reg_count(
+                                       sub->Src[RC_PAIR_PRESUB_SRC].Index);
+               unsigned int i;
+               for (i = 0; i < presub_reg_count; i++) {
+                       sub->Src[i].Used = 1;
+               }
+       }
+}
+
+static void mark_used(
+       struct rc_instruction * inst,
+       struct rc_pair_sub_instruction * sub)
+{
+       unsigned int i;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+               if (src_type & RC_SOURCE_RGB) {
+                       inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
+               }
+
+               if (src_type & RC_SOURCE_ALPHA) {
+                       inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
+               }
+       }
+}
+
+/**
+ * This pass finds sources that are not used by their instruction and marks
+ * them as unused. 
+ */
+void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
+{
+       struct rc_instruction * inst;
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               unsigned int i;
+               if (inst->Type == RC_INSTRUCTION_NORMAL)
+                       continue;
+
+               /* Mark all sources as unused */
+               for (i = 0; i < 4; i++) {
+                       inst->U.P.RGB.Src[i].Used = 0;
+                       inst->U.P.Alpha.Src[i].Used = 0;
+               }
+               mark_used(inst, &inst->U.P.RGB);
+               mark_used(inst, &inst->U.P.Alpha);
+
+               mark_used_presub(&inst->U.P.RGB);
+               mark_used_presub(&inst->U.P.Alpha);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
new file mode 100644 (file)
index 0000000..49983d6
--- /dev/null
@@ -0,0 +1,706 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
+#include "r300_fragprog_swizzle.h"
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+
+struct register_info {
+       struct live_intervals Live[4];
+
+       unsigned int Used:1;
+       unsigned int Allocated:1;
+       unsigned int File:3;
+       unsigned int Index:RC_REGISTER_INDEX_BITS;
+       unsigned int Writemask;
+};
+
+struct regalloc_state {
+       struct radeon_compiler * C;
+
+       struct register_info * Input;
+       unsigned int NumInputs;
+
+       struct register_info * Temporary;
+       unsigned int NumTemporaries;
+
+       unsigned int Simple;
+       int LoopEnd;
+};
+
+enum rc_reg_class {
+       RC_REG_CLASS_SINGLE,
+       RC_REG_CLASS_DOUBLE,
+       RC_REG_CLASS_TRIPLE,
+       RC_REG_CLASS_ALPHA,
+       RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+       RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+       RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+       RC_REG_CLASS_X,
+       RC_REG_CLASS_Y,
+       RC_REG_CLASS_Z,
+       RC_REG_CLASS_XY,
+       RC_REG_CLASS_YZ,
+       RC_REG_CLASS_XZ,
+       RC_REG_CLASS_XW,
+       RC_REG_CLASS_YW,
+       RC_REG_CLASS_ZW,
+       RC_REG_CLASS_XYW,
+       RC_REG_CLASS_YZW,
+       RC_REG_CLASS_XZW,
+       RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+       enum rc_reg_class Class;
+
+       unsigned int WritemaskCount;
+
+       /** This is 1 if this class is being used by the register allocator
+        * and 0 otherwise */
+       unsigned int Used;
+
+       /** This is the ID number assigned to this class by ra. */
+       unsigned int Id;
+
+       /** List of writemasks that belong to this class */
+       unsigned int Writemasks[3];
+
+
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+       if (!src || !src->Used) {
+               DBG("(null)");
+               return;
+       }
+
+       DBG("(%i,%i)", src->Start, src->End);
+}
+
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
+{
+       if (VERBOSE) {
+               DBG("overlap_live_intervals: ");
+               print_live_intervals(a);
+               DBG(" to ");
+               print_live_intervals(b);
+               DBG("\n");
+       }
+
+       if (!a->Used || !b->Used) {
+               DBG("    unused interval\n");
+               return 0;
+       }
+
+       if (a->Start > b->Start) {
+               if (a->Start < b->End) {
+                       DBG("    overlap\n");
+                       return 1;
+               }
+       } else if (b->Start > a->Start) {
+               if (b->Start < a->End) {
+                       DBG("    overlap\n");
+                       return 1;
+               }
+       } else { /* a->Start == b->Start */
+               if (a->Start != a->End && b->Start != b->End) {
+                       DBG("    overlap\n");
+                       return 1;
+               }
+       }
+
+       DBG("    no overlap\n");
+
+       return 0;
+}
+
+static void scan_read_callback(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct regalloc_state * s = data;
+       struct register_info * reg;
+       unsigned int i;
+
+       if (file != RC_FILE_INPUT)
+               return;
+
+       s->Input[index].Used = 1;
+       reg = &s->Input[index];
+
+       for (i = 0; i < 4; i++) {
+               if (!((mask >> i) & 0x1)) {
+                       continue;
+               }
+               reg->Live[i].Used = 1;
+               reg->Live[i].Start = 0;
+               reg->Live[i].End =
+                       s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
+       }
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+               rc_register_file * file, unsigned int * index)
+{
+       struct regalloc_state * s = data;
+       const struct register_info * reg;
+
+       if (*file == RC_FILE_TEMPORARY && s->Simple)
+               reg = &s->Temporary[*index];
+       else if (*file == RC_FILE_INPUT)
+               reg = &s->Input[*index];
+       else
+               return;
+
+       if (reg->Allocated) {
+               *index = reg->Index;
+       }
+}
+
+static void alloc_input_simple(void * data, unsigned int input,
+                                                       unsigned int hwreg)
+{
+       struct regalloc_state * s = data;
+
+       if (input >= s->NumInputs)
+               return;
+
+       s->Input[input].Allocated = 1;
+       s->Input[input].File = RC_FILE_TEMPORARY;
+       s->Input[input].Index = hwreg;
+}
+
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+       for (unsigned i = 0; i < s->NumTemporaries; i++) {
+               s->Temporary[i].Allocated = 1;
+               s->Temporary[i].File = RC_FILE_TEMPORARY;
+               s->Temporary[i].Index = i + s->NumInputs;
+       }
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+       return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static int find_class(
+       struct rc_class * classes,
+       unsigned int writemask,
+       unsigned int max_writemask_count)
+{
+       unsigned int i;
+       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+               unsigned int j;
+               if (classes[i].WritemaskCount > max_writemask_count) {
+                       continue;
+               }
+               for (j = 0; j < 3; j++) {
+                       if (classes[i].Writemasks[j] == writemask) {
+                               return i;
+                       }
+               }
+       }
+       return -1;
+}
+
+static enum rc_reg_class variable_get_class(
+       struct rc_variable * variable,
+       struct rc_class * classes)
+{
+       unsigned int i;
+       unsigned int can_change_writemask= 1;
+       unsigned int writemask = rc_variable_writemask_sum(variable);
+       struct rc_list * readers = rc_variable_readers_union(variable);
+       int class_index;
+
+       if (!variable->C->is_r500) {
+               struct rc_class c;
+               /* The assumption here is that if an instruction has type
+                * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+                * r300 and r400 can't swizzle the result of a TEX lookup. */
+               if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       writemask = RC_MASK_XYZW;
+               }
+
+               /* Check if it is possible to do swizzle packing for r300/r400
+                * without creating non-native swizzles. */
+               class_index = find_class(classes, writemask, 3);
+               if (class_index < 0) {
+                       goto error;
+               }
+               c = classes[class_index];
+               for (i = 0; i < c.WritemaskCount; i++) {
+                       int j;
+                       unsigned int conversion_swizzle =
+                                               rc_make_conversion_swizzle(
+                                               writemask, c.Writemasks[i]);
+                       for (j = 0; j < variable->ReaderCount; j++) {
+                               unsigned int old_swizzle;
+                               unsigned int new_swizzle;
+                               struct rc_reader r = variable->Readers[j];
+                               if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
+                                       old_swizzle = r.U.P.Arg->Swizzle;
+                               } else {
+                                       old_swizzle = r.U.I.Src->Swizzle;
+                               }
+                               new_swizzle = rc_adjust_channels(
+                                       old_swizzle, conversion_swizzle);
+                               if (!r300_swizzle_is_native_basic(new_swizzle)) {
+                                       can_change_writemask = 0;
+                                       break;
+                               }
+                       }
+                       if (!can_change_writemask) {
+                               break;
+                       }
+               }
+       }
+
+       if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+               /* DDX/DDY seem to always fail when their writemasks are
+                * changed.*/
+               if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+                   || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+                       can_change_writemask = 0;
+               }
+       }
+       for ( ; readers; readers = readers->Next) {
+               struct rc_reader * r = readers->Item;
+               if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+                       if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+                               can_change_writemask = 0;
+                               break;
+                       }
+                       /* DDX/DDY also fail when their swizzles are changed. */
+                       if (is_derivative(r->Inst->U.P.RGB.Opcode)
+                           || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+                               can_change_writemask = 0;
+                               break;
+                       }
+               }
+       }
+
+       class_index = find_class(classes, writemask,
+                                               can_change_writemask ? 3 : 1);
+       if (class_index > -1) {
+               return classes[class_index].Class;
+       } else {
+error:
+               rc_error(variable->C,
+                               "Could not find class for index=%u mask=%u\n",
+                               variable->Dst.Index, writemask);
+               return 0;
+       }
+}
+
+static unsigned int overlap_live_intervals_array(
+       struct live_intervals * a,
+       struct live_intervals * b)
+{
+       unsigned int a_chan, b_chan;
+       for (a_chan = 0; a_chan < 4; a_chan++) {
+               for (b_chan = 0; b_chan < 4; b_chan++) {
+                       if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+                                       return 1;
+                       }
+               }
+       }
+       return 0;
+}
+
+static unsigned int reg_get_index(int reg)
+{
+       return reg / RC_MASK_XYZW;
+}
+
+static unsigned int reg_get_writemask(int reg)
+{
+       return (reg % RC_MASK_XYZW) + 1;
+}
+
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+       assert(writemask);
+       if (writemask == 0) {
+               return 0;
+       }
+       return (index * RC_MASK_XYZW) + (writemask - 1);
+}
+
+#if VERBOSE
+static void print_reg(int reg)
+{
+       unsigned int index = reg_get_index(reg);
+       unsigned int mask = reg_get_writemask(reg);
+       fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+               mask & RC_MASK_X ? 'x' : '_',
+               mask & RC_MASK_Y ? 'y' : '_',
+               mask & RC_MASK_Z ? 'z' : '_',
+               mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+
+static void add_register_conflicts(
+       struct ra_regs * regs,
+       unsigned int max_temp_regs)
+{
+       unsigned int index, a_mask, b_mask;
+       for (index = 0; index < max_temp_regs; index++) {
+               for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+                       for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+                                                               b_mask++) {
+                               if (a_mask & b_mask) {
+                                       ra_add_reg_conflict(regs,
+                                               get_reg_id(index, a_mask),
+                                               get_reg_id(index, b_mask));
+                               }
+                       }
+               }
+       }
+}
+
+static void do_advanced_regalloc(struct regalloc_state * s)
+{
+       struct rc_class rc_class_list [] = {
+               {RC_REG_CLASS_SINGLE, 3, 0, 0,
+                       {RC_MASK_X,
+                        RC_MASK_Y,
+                        RC_MASK_Z}},
+               {RC_REG_CLASS_DOUBLE, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y,
+                        RC_MASK_X | RC_MASK_Z,
+                        RC_MASK_Y | RC_MASK_Z}},
+               {RC_REG_CLASS_TRIPLE, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+                        RC_MASK_NONE,
+                        RC_MASK_NONE}},
+               {RC_REG_CLASS_ALPHA, 1, 0, 0,
+                       {RC_MASK_W,
+                        RC_MASK_NONE,
+                        RC_MASK_NONE}},
+               {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_W,
+                        RC_MASK_Y | RC_MASK_W,
+                        RC_MASK_Z | RC_MASK_W}},
+               {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+                        RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+                        RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+               {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_X, 1, 0, 0,
+                       {RC_MASK_X,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_Y, 1, 0, 0,
+                       {RC_MASK_Y,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_Z, 1, 0, 0,
+                       {RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XY, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YZ, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XZ, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YW, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_ZW, 1, 0, 0,
+                       {RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XYW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YZW, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XZW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}}
+       };
+
+       unsigned int i, j, index, input_node, node_count, node_index;
+       unsigned int * node_classes;
+       unsigned int * input_classes;
+       struct rc_instruction * inst;
+       struct rc_list * var_ptr;
+       struct rc_list * variables;
+       struct ra_regs * regs;
+       struct ra_graph * graph;
+
+       /* Allocate the main ra data structure */
+       regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+       /* Get list of program variables */
+       variables = rc_get_variables(s->C);
+       node_count = rc_list_count(variables);
+       node_classes = memory_pool_malloc(&s->C->Pool,
+                       node_count * sizeof(unsigned int));
+       input_classes = memory_pool_malloc(&s->C->Pool,
+                       s->NumInputs * sizeof(unsigned int));
+
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                                       var_ptr = var_ptr->Next, node_index++) {
+               unsigned int class_index;
+               /* Compute the live intervals */
+               rc_variable_compute_live_intervals(var_ptr->Item);
+
+               class_index = variable_get_class(var_ptr->Item, rc_class_list);
+
+               /* If we haven't used this register class yet, mark it
+                * as used and allocate space for it. */
+               if (!rc_class_list[class_index].Used) {
+                       rc_class_list[class_index].Used = 1;
+                       rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+               }
+
+               node_classes[node_index] = rc_class_list[class_index].Id;
+       }
+
+
+       /* Assign registers to the classes */
+       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+               struct rc_class class = rc_class_list[i];
+               if (!class.Used) {
+                       continue;
+               }
+
+               for (index = 0; index < s->C->max_temp_regs; index++) {
+                       for (j = 0; j < class.WritemaskCount; j++) {
+                               int reg_id = get_reg_id(index,
+                                                       class.Writemasks[j]);
+                               ra_class_add_reg(regs, class.Id, reg_id);
+                       }
+               }
+       }
+
+       /* Add register conflicts */
+       add_register_conflicts(regs, s->C->max_temp_regs);
+
+       /* Calculate live intervals for input registers */
+       for (inst = s->C->Program.Instructions.Next;
+                                       inst != &s->C->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_opcode op = rc_get_flow_control_inst(inst);
+               if (op == RC_OPCODE_BGNLOOP) {
+                       struct rc_instruction * endloop =
+                                                       rc_match_bgnloop(inst);
+                       if (endloop->IP > s->LoopEnd) {
+                               s->LoopEnd = endloop->IP;
+                       }
+               }
+               rc_for_all_reads_mask(inst, scan_read_callback, s);
+       }
+
+       /* Create classes for input registers */
+       for (i = 0; i < s->NumInputs; i++) {
+               unsigned int chan, class_id, writemask = 0;
+               for (chan = 0; chan < 4; chan++) {
+                       if (s->Input[i].Live[chan].Used) {
+                               writemask |= (1 << chan);
+                       }
+               }
+               s->Input[i].Writemask = writemask;
+               if (!writemask) {
+                       continue;
+               }
+
+               class_id = ra_alloc_reg_class(regs);
+               input_classes[i] = class_id;
+               ra_class_add_reg(regs, class_id,
+                               get_reg_id(s->Input[i].Index, writemask));
+       }
+
+       ra_set_finalize(regs);
+
+       graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
+
+       /* Build the interference graph */
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                                       var_ptr = var_ptr->Next,node_index++) {
+               struct rc_list * a, * b;
+               unsigned int b_index;
+
+               ra_set_node_class(graph, node_index, node_classes[node_index]);
+
+               for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+                                               b; b = b->Next, b_index++) {
+                       struct rc_variable * var_a = a->Item;
+                       while (var_a) {
+                               struct rc_variable * var_b = b->Item;
+                               while (var_b) {
+                                       if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+                                               ra_add_node_interference(graph,
+                                                       node_index, b_index);
+                                       }
+                                       var_b = var_b->Friend;
+                               }
+                               var_a = var_a->Friend;
+                       }
+               }
+       }
+
+       /* Add input registers to the interference graph */
+       for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+               if (!s->Input[i].Writemask) {
+                       continue;
+               }
+               ra_set_node_class(graph, node_count + input_node,
+                                                       input_classes[i]);
+               for (var_ptr = variables, node_index = 0;
+                               var_ptr; var_ptr = var_ptr->Next, node_index++) {
+                       struct rc_variable * var = var_ptr->Item;
+                       if (overlap_live_intervals_array(s->Input[i].Live,
+                                                               var->Live)) {
+                               ra_add_node_interference(graph, node_index,
+                                               node_count + input_node);
+                       }
+               }
+               /* Manually allocate a register for this input */
+               ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+                               s->Input[i].Index, s->Input[i].Writemask));
+               input_node++;
+       }
+
+       if (!ra_allocate_no_spills(graph)) {
+               rc_error(s->C, "Ran out of hardware temporaries\n");
+               return;
+       }
+
+       /* Rewrite the registers */
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                               var_ptr = var_ptr->Next, node_index++) {
+               int reg = ra_get_node_reg(graph, node_index);
+               unsigned int writemask = reg_get_writemask(reg);
+               unsigned int index = reg_get_index(reg);
+               struct rc_variable * var = var_ptr->Item;
+
+               if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       writemask = rc_variable_writemask_sum(var);
+               }
+
+               if (var->Dst.File == RC_FILE_INPUT) {
+                       continue;
+               }
+               rc_variable_change_dst(var, index, writemask);
+       }
+
+       ralloc_free(graph);
+       ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value.  If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+  */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+       struct r300_fragment_program_compiler *c =
+                               (struct r300_fragment_program_compiler*)cc;
+       struct regalloc_state s;
+       int * do_full_regalloc = (int*)user;
+
+       memset(&s, 0, sizeof(s));
+       s.C = cc;
+       s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+       s.Input = memory_pool_malloc(&cc->Pool,
+                       s.NumInputs * sizeof(struct register_info));
+       memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+       s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+       s.Temporary = memory_pool_malloc(&cc->Pool,
+                       s.NumTemporaries * sizeof(struct register_info));
+       memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+       rc_recompute_ips(s.C);
+
+       c->AllocateHwInputs(c, &alloc_input_simple, &s);
+       if (*do_full_regalloc) {
+               do_advanced_regalloc(&s);
+       } else {
+               s.Simple = 1;
+               do_regalloc_inputs_only(&s);
+       }
+
+       /* Rewrite inputs and if we are doing the simple allocation, rewrite
+        * temporaries too. */
+       for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+                                       inst != &s.C->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_remap_registers(inst, &remap_register, &s);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
new file mode 100644 (file)
index 0000000..25cd52c
--- /dev/null
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+       struct rc_instruction * Instruction;
+
+       /** Next instruction in the linked list of ready instructions. */
+       struct schedule_instruction *NextReady;
+
+       /** Values that this instruction reads and writes */
+       struct reg_value * WriteValues[4];
+       struct reg_value * ReadValues[12];
+       unsigned int NumWriteValues:3;
+       unsigned int NumReadValues:4;
+
+       /**
+        * Number of (read and write) dependencies that must be resolved before
+        * this instruction can be scheduled.
+        */
+       unsigned int NumDependencies:5;
+
+       /** List of all readers (see rc_get_readers() for the definition of
+        * "all readers"), even those outside the basic block this instruction
+        * lives in. */
+       struct rc_reader_data GlobalReaders;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+       struct schedule_instruction *Reader;
+       struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+       struct schedule_instruction * Writer;
+
+       /**
+        * Unordered linked list of instructions that read from this value.
+        * When this value becomes available, we increase all readers'
+        * dependency count.
+        */
+       struct reg_value_reader *Readers;
+
+       /**
+        * Number of readers of this value. This is decremented each time
+        * a reader of the value is committed.
+        * When the reader cound reaches zero, the dependency count
+        * of the instruction writing \ref Next is decremented.
+        */
+       unsigned int NumReaders;
+
+       struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+       struct reg_value * Values[4];
+};
+
+struct remap_reg {
+       struct rc_instruciont * Inst;
+       unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+       unsigned int OldSwizzle:3;
+       unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+       unsigned int NewSwizzle:3;
+       unsigned int OnlyTexReads:1;
+       struct remap_reg * Next;
+};
+
+struct schedule_state {
+       struct radeon_compiler * C;
+       struct schedule_instruction * Current;
+
+       struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+       /**
+        * Linked lists of instructions that can be scheduled right now,
+        * based on which ALU/TEX resources they require.
+        */
+       /*@{*/
+       struct schedule_instruction *ReadyFullALU;
+       struct schedule_instruction *ReadyRGB;
+       struct schedule_instruction *ReadyAlpha;
+       struct schedule_instruction *ReadyTEX;
+       /*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+               rc_register_file file, unsigned int index, unsigned int chan)
+{
+       if (file != RC_FILE_TEMPORARY)
+               return 0;
+
+       if (index >= RC_REGISTER_MAX_INDEX) {
+               rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+               return 0;
+       }
+
+       return &s->Temporary[index].Values[chan];
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+       inst->NextReady = *list;
+       *list = inst;
+}
+
+static void add_inst_to_list_end(struct schedule_instruction ** list,
+                                       struct schedule_instruction * inst)
+{
+       if(!*list){
+               *list = inst;
+       }else{
+               struct schedule_instruction * temp = *list;
+               while(temp->NextReady){
+                       temp = temp->NextReady;
+               }
+               temp->NextReady = inst;
+       }
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+       DBG("%i is now ready\n", sinst->Instruction->IP);
+
+       /* Adding Ready TEX instructions to the end of the "Ready List" helps
+        * us emit TEX instructions in blocks without losing our place. */
+       if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+               add_inst_to_list_end(&s->ReadyTEX, sinst);
+       else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+               add_inst_to_list(&s->ReadyRGB, sinst);
+       else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+               add_inst_to_list(&s->ReadyAlpha, sinst);
+       else
+               add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+       assert(sinst->NumDependencies > 0);
+       sinst->NumDependencies--;
+       if (!sinst->NumDependencies)
+               instruction_ready(s, sinst);
+}
+
+/**
+ * This function decreases the dependencies of the next instruction that
+ * wants to write to each of sinst's read values.
+ */
+static void commit_update_reads(struct schedule_state * s,
+                                       struct schedule_instruction * sinst){
+       unsigned int i;
+       for(i = 0; i < sinst->NumReadValues; ++i) {
+               struct reg_value * v = sinst->ReadValues[i];
+               assert(v->NumReaders > 0);
+               v->NumReaders--;
+               if (!v->NumReaders) {
+                       if (v->Next)
+                               decrease_dependencies(s, v->Next->Writer);
+               }
+       }
+}
+
+static void commit_update_writes(struct schedule_state * s,
+                                       struct schedule_instruction * sinst){
+       unsigned int i;
+       for(i = 0; i < sinst->NumWriteValues; ++i) {
+               struct reg_value * v = sinst->WriteValues[i];
+               if (v->NumReaders) {
+                       for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+                               decrease_dependencies(s, r->Reader);
+                       }
+               } else {
+                       /* This happens in instruction sequences of the type
+                        *  OP r.x, ...;
+                        *  OP r.x, r.x, ...;
+                        * See also the subtlety in how instructions that both
+                        * read and write the same register are scanned.
+                        */
+                       if (v->Next)
+                               decrease_dependencies(s, v->Next->Writer);
+               }
+       }
+}
+
+static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+       DBG("%i: commit\n", sinst->Instruction->IP);
+
+       commit_update_reads(s, sinst);
+
+       commit_update_writes(s, sinst);
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+       struct schedule_instruction *readytex;
+       struct rc_instruction * inst_begin;
+
+       assert(s->ReadyTEX);
+
+       /* Node marker for R300 */
+       inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+       inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+       /* Link texture instructions back in */
+       readytex = s->ReadyTEX;
+       while(readytex) {
+               rc_insert_instruction(before->Prev, readytex->Instruction);
+               DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
+
+               /* All of the TEX instructions in the same TEX block have
+                * their source registers read from before any of the
+                * instructions in that block write to their destination
+                * registers.  This means that when we commit a TEX
+                * instruction, any other TEX instruction that wants to write
+                * to one of the committed instruction's source register can be
+                * marked as ready and should be emitted in the same TEX
+                * block. This prevents the following sequence from being
+                * emitted in two different TEX blocks:
+                * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
+                * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
+                */
+               commit_update_reads(s, readytex);
+               readytex = readytex->NextReady;
+       }
+       readytex = s->ReadyTEX;
+       s->ReadyTEX = 0;
+       while(readytex){
+               DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
+               commit_update_writes(s, readytex);
+               readytex = readytex->NextReady;
+       }
+}
+
+/* This is a helper function for destructive_merge_instructions().  It helps
+ * merge presubtract sources from two instructions and makes sure the
+ * presubtract sources end up in the correct spot.  This function assumes that
+ * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
+ * but no scalar instruction (alpha).
+ * @return 0 if merging the presubtract sources fails.
+ * @retrun 1 if merging the presubtract sources succeeds.
+ */
+static int merge_presub_sources(
+       struct rc_pair_instruction * dst_full,
+       struct rc_pair_sub_instruction src,
+       unsigned int type)
+{
+       unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
+       struct rc_pair_sub_instruction * dst_sub;
+       const struct rc_opcode_info * info;
+
+       assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
+
+       switch(type) {
+       case RC_SOURCE_RGB:
+               is_rgb = 1;
+               is_alpha = 0;
+               dst_sub = &dst_full->RGB;
+               break;
+       case RC_SOURCE_ALPHA:
+               is_rgb = 0;
+               is_alpha = 1;
+               dst_sub = &dst_full->Alpha;
+               break;
+       default:
+               assert(0);
+               return 0;
+       }
+
+       info = rc_get_opcode_info(dst_full->RGB.Opcode);
+
+       if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
+               return 0;
+
+       srcp_regs = rc_presubtract_src_reg_count(
+                                       src.Src[RC_PAIR_PRESUB_SRC].Index);
+       for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+               unsigned int arg;
+               int free_source;
+               unsigned int one_way = 0;
+               struct rc_pair_instruction_source srcp = src.Src[srcp_src];
+               struct rc_pair_instruction_source temp;
+
+               free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
+                                                       srcp.File, srcp.Index);
+
+               /* If free_source < 0 then there are no free source
+                * slots. */
+               if (free_source < 0)
+                       return 0;
+
+               temp = dst_sub->Src[srcp_src];
+               dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
+
+               /* srcp needs src0 and src1 to be the same */
+               if (free_source < srcp_src) {
+                       if (!temp.Used)
+                               continue;
+                       free_source = rc_pair_alloc_source(dst_full, is_rgb,
+                                       is_alpha, temp.File, temp.Index);
+                       if (free_source < 0)
+                               return 0;
+                       one_way = 1;
+               } else {
+                       dst_sub->Src[free_source] = temp;
+               }
+
+               /* If free_source == srcp_src, then the presubtract
+                * source is already in the correct place. */
+               if (free_source == srcp_src)
+                       continue;
+
+               /* Shuffle the sources, so we can put the
+                * presubtract source in the correct place. */
+               for(arg = 0; arg < info->NumSrcRegs; arg++) {
+                       /*If this arg does not read from an rgb source,
+                        * do nothing. */
+                       if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
+                                                               & type)) {
+                               continue;
+                       }
+
+                       if (dst_full->RGB.Arg[arg].Source == srcp_src)
+                               dst_full->RGB.Arg[arg].Source = free_source;
+                       /* We need to do this just in case register
+                        * is one of the sources already, but in the
+                        * wrong spot. */
+                       else if(dst_full->RGB.Arg[arg].Source == free_source
+                                                       && !one_way) {
+                               dst_full->RGB.Arg[arg].Source = srcp_src;
+                       }
+               }
+       }
+       return 1;
+}
+
+
+/* This function assumes that rgb.Alpha and alpha.RGB are unused */
+static int destructive_merge_instructions(
+               struct rc_pair_instruction * rgb,
+               struct rc_pair_instruction * alpha)
+{
+       const struct rc_opcode_info * opcode;
+
+       assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+       assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+       /* Presubtract registers need to be merged first so that registers
+        * needed by the presubtract operation can be placed in src0 and/or
+        * src1. */
+
+       /* Merge the rgb presubtract registers. */
+       if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
+                       return 0;
+               }
+       }
+       /* Merge the alpha presubtract registers */
+       if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
+                       return 0;
+               }
+       }
+
+       /* Copy alpha args into rgb */
+       opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+       for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+               unsigned int srcrgb = 0;
+               unsigned int srcalpha = 0;
+               unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+               rc_register_file file = 0;
+               unsigned int index = 0;
+               int source;
+
+               if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
+                       srcrgb = 1;
+                       file = alpha->RGB.Src[oldsrc].File;
+                       index = alpha->RGB.Src[oldsrc].Index;
+               } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
+                       srcalpha = 1;
+                       file = alpha->Alpha.Src[oldsrc].File;
+                       index = alpha->Alpha.Src[oldsrc].Index;
+               }
+
+               source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+               if (source < 0)
+                       return 0;
+
+               rgb->Alpha.Arg[arg].Source = source;
+               rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+               rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+               rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+       }
+
+       /* Copy alpha opcode into rgb */
+       rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+       rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+       rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+       rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+       rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+       rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+       /* Merge ALU result writing */
+       if (alpha->WriteALUResult) {
+               if (rgb->WriteALUResult)
+                       return 0;
+
+               rgb->WriteALUResult = alpha->WriteALUResult;
+               rgb->ALUResultCompare = alpha->ALUResultCompare;
+       }
+
+       return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+       struct rc_pair_instruction backup;
+
+       /*Instructions can't write output registers and ALU result at the
+        * same time. */
+       if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
+               || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
+               return 0;
+       }
+       memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+       if (destructive_merge_instructions(rgb, alpha))
+               return 1;
+
+       memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+       return 0;
+}
+
+static void presub_nop(struct rc_instruction * emitted) {
+       int prev_rgb_index, prev_alpha_index, i, num_src;
+
+       /* We don't need a nop if the previous instruction is a TEX. */
+       if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
+               return;
+       }
+       if (emitted->Prev->U.P.RGB.WriteMask)
+               prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
+       else
+               prev_rgb_index = -1;
+       if (emitted->Prev->U.P.Alpha.WriteMask)
+               prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
+       else
+               prev_alpha_index = 1;
+
+       /* Check the previous rgb instruction */
+       if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               num_src = rc_presubtract_src_reg_count(
+                               emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+               for (i = 0; i < num_src; i++) {
+                       unsigned int index = emitted->U.P.RGB.Src[i].Index;
+                       if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
+                           && (index  == prev_rgb_index
+                               || index == prev_alpha_index)) {
+                               emitted->Prev->U.P.Nop = 1;
+                               return;
+                       }
+               }
+       }
+
+       /* Check the previous alpha instruction. */
+       if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+               return;
+
+       num_src = rc_presubtract_src_reg_count(
+                               emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+       for (i = 0; i < num_src; i++) {
+               unsigned int index = emitted->U.P.Alpha.Src[i].Index;
+               if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
+                  && (index == prev_rgb_index || index == prev_alpha_index)) {
+                       emitted->Prev->U.P.Nop = 1;
+                       return;
+               }
+       }
+}
+
+static void rgb_to_alpha_remap (
+       struct rc_instruction * inst,
+       struct rc_pair_instruction_arg * arg,
+       rc_register_file old_file,
+       rc_swizzle old_swz,
+       unsigned int new_index)
+{
+       int new_src_index;
+       unsigned int i;
+
+       for (i = 0; i < 3; i++) {
+               if (get_swz(arg->Swizzle, i) == old_swz) {
+                       SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+               }
+       }
+       new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+                                                       old_file, new_index);
+       /* This conversion is not possible, we must have made a mistake in
+        * is_rgb_to_alpha_possible. */
+       if (new_src_index < 0) {
+               assert(0);
+               return;
+       }
+
+       arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_DDX:
+       case RC_OPCODE_DDY:
+               return 0;
+       default:
+               return 1;
+       }
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+       switch(opcode) {
+       case RC_OPCODE_DDX:
+       case RC_OPCODE_DDY:
+       case RC_OPCODE_DP2:
+       case RC_OPCODE_DP3:
+       case RC_OPCODE_DP4:
+       case RC_OPCODE_DPH:
+               return 0;
+       default:
+               return 1;
+       }
+}
+
+static void is_rgb_to_alpha_possible(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_pair_instruction_arg * arg,
+       struct rc_pair_instruction_source * src)
+{
+       unsigned int chan_count = 0;
+       unsigned int alpha_sources = 0;
+       unsigned int i;
+       struct rc_reader_data * reader_data = userdata;
+
+       if (!can_remap(inst->U.P.RGB.Opcode)
+           || !can_remap(inst->U.P.Alpha.Opcode)) {
+               reader_data->Abort = 1;
+               return;
+       }
+
+       if (!src)
+               return;
+
+       /* XXX There are some cases where we can still do the conversion if
+        * a reader reads from a presubtract source, but for now we'll prevent
+        * it. */
+       if (arg->Source == RC_PAIR_PRESUB_SRC) {
+               reader_data->Abort = 1;
+               return;
+       }
+
+       /* Make sure the source only reads from one component.
+        * XXX We should allow the source to read from the same component twice.
+        * XXX If the index we will be converting to is the same as the
+        * current index, then it is OK to read from more than one component.
+        */
+       for (i = 0; i < 3; i++) {
+               rc_swizzle swz = get_swz(arg->Swizzle, i);
+               switch(swz) {
+               case RC_SWIZZLE_X:
+               case RC_SWIZZLE_Y:
+               case RC_SWIZZLE_Z:
+               case RC_SWIZZLE_W:
+                       chan_count++;
+                       break;
+               default:
+                       break;
+               }
+       }
+       if (chan_count > 1) {
+               reader_data->Abort = 1;
+               return;
+       }
+
+       /* Make sure there are enough alpha sources.
+        * XXX If we know what register all the readers are going
+        * to be remapped to, then in some situations we can still do
+        * the subsitution, even if all 3 alpha sources are being used.*/
+       for (i = 0; i < 3; i++) {
+               if (inst->U.P.Alpha.Src[i].Used) {
+                       alpha_sources++;
+               }
+       }
+       if (alpha_sources > 2) {
+               reader_data->Abort = 1;
+               return;
+       }
+}
+
+static int convert_rgb_to_alpha(
+       struct schedule_state * s,
+       struct schedule_instruction * sched_inst)
+{
+       struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+       unsigned int old_mask = pair_inst->RGB.WriteMask;
+       unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+       const struct rc_opcode_info * info =
+                               rc_get_opcode_info(pair_inst->RGB.Opcode);
+       int new_index = -1;
+       unsigned int i;
+
+       if (sched_inst->GlobalReaders.Abort)
+               return 0;
+
+       if (!pair_inst->RGB.WriteMask)
+               return 0;
+
+       if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+           || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+               return 0;
+       }
+
+       assert(sched_inst->NumWriteValues == 1);
+
+       if (!sched_inst->WriteValues[0]) {
+               assert(0);
+               return 0;
+       }
+
+       /* We start at the old index, because if we can reuse the same
+        * register and just change the swizzle then it is more likely we
+        * will be able to convert all the readers. */
+       for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+               struct reg_value ** new_regvalp = get_reg_valuep(
+                                               s, RC_FILE_TEMPORARY, i, 3);
+               if (!*new_regvalp) {
+                       struct reg_value ** old_regvalp =
+                               get_reg_valuep(s,
+                                       RC_FILE_TEMPORARY,
+                                       pair_inst->RGB.DestIndex,
+                                       rc_mask_to_swizzle(old_mask));
+                       new_index = i;
+                       *new_regvalp = *old_regvalp;
+                       *old_regvalp = NULL;
+                       new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+                       break;
+               }
+       }
+       if (new_index < 0) {
+               return 0;
+       }
+
+       pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+       pair_inst->Alpha.DestIndex = new_index;
+       pair_inst->Alpha.WriteMask = RC_MASK_W;
+       pair_inst->Alpha.Target = pair_inst->RGB.Target;
+       pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+       pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+       pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+       memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+                                               sizeof(pair_inst->Alpha.Arg));
+       /* Move the swizzles into the first chan */
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               unsigned int j;
+               for (j = 0; j < 3; j++) {
+                       unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+                       if (swz != RC_SWIZZLE_UNUSED) {
+                               pair_inst->Alpha.Arg[i].Swizzle =
+                                                       rc_init_swizzle(swz, 1);
+                               break;
+                       }
+               }
+       }
+       pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+       pair_inst->RGB.DestIndex = 0;
+       pair_inst->RGB.WriteMask = 0;
+       pair_inst->RGB.Target = 0;
+       pair_inst->RGB.OutputWriteMask = 0;
+       pair_inst->RGB.DepthWriteMask = 0;
+       pair_inst->RGB.Saturate = 0;
+       memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+       for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+               struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+               rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
+                                       RC_FILE_TEMPORARY, old_swz, new_index);
+       }
+       return 1;
+}
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+       struct schedule_instruction * sinst;
+
+       if (s->ReadyFullALU) {
+               sinst = s->ReadyFullALU;
+               s->ReadyFullALU = s->ReadyFullALU->NextReady;
+               rc_insert_instruction(before->Prev, sinst->Instruction);
+               commit_alu_instruction(s, sinst);
+       } else {
+               struct schedule_instruction **prgb;
+               struct schedule_instruction **palpha;
+               struct schedule_instruction *prev;
+pair:
+               /* Some pairings might fail because they require too
+                * many source slots; try all possible pairings if necessary */
+               for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+                       for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+                               struct schedule_instruction * psirgb = *prgb;
+                               struct schedule_instruction * psialpha = *palpha;
+
+                               if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+                                       continue;
+
+                               *prgb = (*prgb)->NextReady;
+                               *palpha = (*palpha)->NextReady;
+                               rc_insert_instruction(before->Prev, psirgb->Instruction);
+                               commit_alu_instruction(s, psirgb);
+                               commit_alu_instruction(s, psialpha);
+                               goto success;
+                       }
+               }
+               prev = NULL;
+               /* No success in pairing, now try to convert one of the RGB
+                * instructions to an Alpha so we can pair it with another RGB.
+                */
+               if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+               for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+                       if ((*prgb)->NumWriteValues == 1) {
+                               struct schedule_instruction * prgb_next;
+                               if (!convert_rgb_to_alpha(s, *prgb))
+                                       goto cont_loop;
+                               prgb_next = (*prgb)->NextReady;
+                               /* Add instruction to the Alpha ready list. */
+                               (*prgb)->NextReady = s->ReadyAlpha;
+                               s->ReadyAlpha = *prgb;
+                               /* Remove instruction from the RGB ready list.*/
+                               if (prev)
+                                       prev->NextReady = prgb_next;
+                               else
+                                       s->ReadyRGB = prgb_next;
+                               goto pair;
+                       }
+cont_loop:
+                       prev = *prgb;
+               }
+               }
+               /* Still no success in pairing, just take the first RGB
+                * or alpha instruction. */
+               if (s->ReadyRGB) {
+                       sinst = s->ReadyRGB;
+                       s->ReadyRGB = s->ReadyRGB->NextReady;
+               } else if (s->ReadyAlpha) {
+                       sinst = s->ReadyAlpha;
+                       s->ReadyAlpha = s->ReadyAlpha->NextReady;
+               } else {
+                       /*XXX Something real bad has happened. */
+                       assert(0);
+               }
+
+               rc_insert_instruction(before->Prev, sinst->Instruction);
+               commit_alu_instruction(s, sinst);
+       success: ;
+       }
+       /* If the instruction we just emitted uses a presubtract value, and
+        * the presubtract sources were written by the previous intstruction,
+        * the previous instruction needs a nop. */
+       presub_nop(before->Prev);
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int chan)
+{
+       struct schedule_state * s = data;
+       struct reg_value ** v = get_reg_valuep(s, file, index, chan);
+       struct reg_value_reader * reader;
+
+       if (!v)
+               return;
+
+       if (*v && (*v)->Writer == s->Current) {
+               /* The instruction reads and writes to a register component.
+                * In this case, we only want to increment dependencies by one.
+                */
+               return;
+       }
+
+       DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+       reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+       reader->Reader = s->Current;
+       if (!*v) {
+               /* In this situation, the instruction reads from a register
+                * that hasn't been written to or read from in the current
+                * block. */
+               *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+               memset(*v, 0, sizeof(struct reg_value));
+               (*v)->Readers = reader;
+       } else {
+               reader->Next = (*v)->Readers;
+               (*v)->Readers = reader;
+               /* Only update the current instruction's dependencies if the
+                * register it reads from has been written to in this block. */
+               if ((*v)->Writer) {
+                       s->Current->NumDependencies++;
+               }
+       }
+       (*v)->NumReaders++;
+
+       if (s->Current->NumReadValues >= 12) {
+               rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+       } else {
+               s->Current->ReadValues[s->Current->NumReadValues++] = *v;
+       }
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int chan)
+{
+       struct schedule_state * s = data;
+       struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+       struct reg_value * newv;
+
+       if (!pv)
+               return;
+
+       DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+       newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+       memset(newv, 0, sizeof(*newv));
+
+       newv->Writer = s->Current;
+
+       if (*pv) {
+               (*pv)->Next = newv;
+               s->Current->NumDependencies++;
+       }
+
+       *pv = newv;
+
+       if (s->Current->NumWriteValues >= 4) {
+               rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+       } else {
+               s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+       }
+}
+
+static void is_rgb_to_alpha_possible_normal(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       struct rc_reader_data * reader_data = userdata;
+       reader_data->Abort = 1;
+
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+               struct rc_instruction * begin, struct rc_instruction * end)
+{
+       struct schedule_state s;
+       unsigned int ip;
+
+       memset(&s, 0, sizeof(s));
+       s.C = &c->Base;
+
+       /* Scan instructions for data dependencies */
+       ip = 0;
+       for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+               s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+               memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+               s.Current->Instruction = inst;
+               inst->IP = ip++;
+
+               DBG("%i: Scanning\n", inst->IP);
+
+               /* The order of things here is subtle and maybe slightly
+                * counter-intuitive, to account for the case where an
+                * instruction writes to the same register as it reads
+                * from. */
+               rc_for_all_writes_chan(inst, &scan_write, &s);
+               rc_for_all_reads_chan(inst, &scan_read, &s);
+
+               DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+               if (!s.Current->NumDependencies)
+                       instruction_ready(&s, s.Current);
+
+               /* Get global readers for possible RGB->Alpha conversion. */
+               s.Current->GlobalReaders.ExitOnAbort = 1;
+               rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+                               is_rgb_to_alpha_possible_normal,
+                               is_rgb_to_alpha_possible, NULL);
+       }
+
+       /* Temporarily unlink all instructions */
+       begin->Prev->Next = end;
+       end->Prev = begin->Prev;
+
+       /* Schedule instructions back */
+       while(!s.C->Error &&
+             (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+               if (s.ReadyTEX)
+                       emit_all_tex(&s, end);
+
+               while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+                       emit_one_alu(&s, end);
+       }
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+       if (inst->Type == RC_INSTRUCTION_NORMAL) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+               return opcode->IsFlowControl;
+       }
+       return 0;
+}
+
+void rc_pair_schedule(struct radeon_compiler *cc, void *user)
+{
+       struct schedule_state s;
+
+       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+       struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+       memset(&s, 0, sizeof(s));
+       s.C = &c->Base;
+       while(inst != &c->Base.Program.Instructions) {
+               struct rc_instruction * first;
+
+               if (is_controlflow(inst)) {
+                       inst = inst->Next;
+                       continue;
+               }
+
+               first = inst;
+
+               while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+                       inst = inst->Next;
+
+               DBG("Schedule one block\n");
+               schedule_block(c, first, inst);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
new file mode 100644 (file)
index 0000000..2dae56a
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+       struct rc_src_register tmp;
+
+       switch(inst->Opcode) {
+       case RC_OPCODE_ADD:
+               inst->SrcReg[2] = inst->SrcReg[1];
+               inst->SrcReg[1].File = RC_FILE_NONE;
+               inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+               inst->SrcReg[1].Negate = RC_MASK_NONE;
+               inst->Opcode = RC_OPCODE_MAD;
+               break;
+       case RC_OPCODE_CMP:
+               tmp = inst->SrcReg[2];
+               inst->SrcReg[2] = inst->SrcReg[0];
+               inst->SrcReg[0] = tmp;
+               break;
+       case RC_OPCODE_MOV:
+               /* AMD say we should use CMP.
+                * However, when we transform
+                *  KIL -r0;
+                * into
+                *  CMP tmp, -r0, -r0, 0;
+                *  KIL tmp;
+                * we get incorrect behaviour on R500 when r0 == 0.0.
+                * It appears that the R500 KIL hardware treats -0.0 as less
+                * than zero.
+                */
+               inst->SrcReg[1].File = RC_FILE_NONE;
+               inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+               inst->SrcReg[2].File = RC_FILE_NONE;
+               inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+               inst->Opcode = RC_OPCODE_MAD;
+               break;
+       case RC_OPCODE_MUL:
+               inst->SrcReg[2].File = RC_FILE_NONE;
+               inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+               inst->Opcode = RC_OPCODE_MAD;
+               break;
+       default:
+               /* nothing to do */
+               break;
+       }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+       int * needrgb, int * needalpha, int * istranscendent)
+{
+       *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+       *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+       *istranscendent = 0;
+
+       if (inst->WriteALUResult == RC_ALURESULT_X)
+               *needrgb = 1;
+       else if (inst->WriteALUResult == RC_ALURESULT_W)
+               *needalpha = 1;
+
+       switch(inst->Opcode) {
+       case RC_OPCODE_ADD:
+       case RC_OPCODE_CMP:
+       case RC_OPCODE_CND:
+       case RC_OPCODE_DDX:
+       case RC_OPCODE_DDY:
+       case RC_OPCODE_FRC:
+       case RC_OPCODE_MAD:
+       case RC_OPCODE_MAX:
+       case RC_OPCODE_MIN:
+       case RC_OPCODE_MOV:
+       case RC_OPCODE_MUL:
+               break;
+       case RC_OPCODE_COS:
+       case RC_OPCODE_EX2:
+       case RC_OPCODE_LG2:
+       case RC_OPCODE_RCP:
+       case RC_OPCODE_RSQ:
+       case RC_OPCODE_SIN:
+               *istranscendent = 1;
+               *needalpha = 1;
+               break;
+       case RC_OPCODE_DP4:
+               *needalpha = 1;
+               /* fall through */
+       case RC_OPCODE_DP3:
+               *needrgb = 1;
+               break;
+       default:
+               break;
+       }
+}
+
+static void src_uses(struct rc_src_register src, unsigned int * rgb,
+                                                       unsigned int * alpha)
+{
+       int j;
+       for(j = 0; j < 4; ++j) {
+               unsigned int swz = GET_SWZ(src.Swizzle, j);
+               if (swz < 3)
+                       *rgb = 1;
+               else if (swz < 4)
+                       *alpha = 1;
+       }
+}
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+       struct rc_pair_instruction * pair,
+       struct rc_sub_instruction * inst)
+{
+       int needrgb, needalpha, istranscendent;
+       const struct rc_opcode_info * opcode;
+       int i;
+
+       memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+       classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+       if (needrgb) {
+               if (istranscendent)
+                       pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+               else
+                       pair->RGB.Opcode = inst->Opcode;
+               if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+                       pair->RGB.Saturate = 1;
+       }
+       if (needalpha) {
+               pair->Alpha.Opcode = inst->Opcode;
+               if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+                       pair->Alpha.Saturate = 1;
+       }
+
+       opcode = rc_get_opcode_info(inst->Opcode);
+
+       /* Presubtract handling:
+        * We need to make sure that the values used by the presubtract
+        * operation end up in src0 or src1. */
+       if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
+               /* rc_pair_alloc_source() will fill in data for
+                * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
+               int j;
+               for(j = 0; j < 3; j++) {
+                       int src_regs;
+                       if(inst->SrcReg[j].File != RC_FILE_PRESUB)
+                               continue;
+
+                       src_regs = rc_presubtract_src_reg_count(
+                                                       inst->PreSub.Opcode);
+                       for(i = 0; i < src_regs; i++) {
+                               unsigned int rgb = 0;
+                               unsigned int alpha = 0;
+                               src_uses(inst->SrcReg[j], &rgb, &alpha);
+                               if(rgb) {
+                                       pair->RGB.Src[i].File =
+                                               inst->PreSub.SrcReg[i].File;
+                                       pair->RGB.Src[i].Index =
+                                               inst->PreSub.SrcReg[i].Index;
+                                       pair->RGB.Src[i].Used = 1;
+                               }
+                               if(alpha) {
+                                       pair->Alpha.Src[i].File =
+                                               inst->PreSub.SrcReg[i].File;
+                                       pair->Alpha.Src[i].Index =
+                                               inst->PreSub.SrcReg[i].Index;
+                                       pair->Alpha.Src[i].Used = 1;
+                               }
+                       }
+               }
+       }
+
+       for(i = 0; i < opcode->NumSrcRegs; ++i) {
+               int source;
+               if (needrgb && !istranscendent) {
+                       unsigned int srcrgb = 0;
+                       unsigned int srcalpha = 0;
+                       unsigned int srcmask = 0;
+                       int j;
+                       /* We don't care about the alpha channel here.  We only
+                        * want the part of the swizzle that writes to rgb,
+                        * since we are creating an rgb instruction. */
+                       for(j = 0; j < 3; ++j) {
+                               unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+
+                               if (swz < RC_SWIZZLE_W)
+                                       srcrgb = 1;
+                               else if (swz == RC_SWIZZLE_W)
+                                       srcalpha = 1;
+
+                               if (swz < RC_SWIZZLE_UNUSED)
+                                       srcmask |= 1 << j;
+                       }
+                       source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+                                                       inst->SrcReg[i].File, inst->SrcReg[i].Index);
+                       if (source < 0) {
+                               rc_error(&c->Base, "Failed to translate "
+                                                       "rgb instruction.\n");
+                               return;
+                       }
+                       pair->RGB.Arg[i].Source = source;
+                       pair->RGB.Arg[i].Swizzle =
+                               rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
+                       pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+                       pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+               }
+               if (needalpha) {
+                       unsigned int srcrgb = 0;
+                       unsigned int srcalpha = 0;
+                       unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+                       if (swz < 3)
+                               srcrgb = 1;
+                       else if (swz < 4)
+                               srcalpha = 1;
+                       source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+                                                       inst->SrcReg[i].File, inst->SrcReg[i].Index);
+                       if (source < 0) {
+                               rc_error(&c->Base, "Failed to translate "
+                                                       "alpha instruction.\n");
+                               return;
+                       }
+                       pair->Alpha.Arg[i].Source = source;
+                       pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
+                       pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+                       pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+               }
+       }
+
+       /* Destination handling */
+       if (inst->DstReg.File == RC_FILE_OUTPUT) {
+        if (inst->DstReg.Index == c->OutputDepth) {
+            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+        } else {
+            for (i = 0; i < 4; i++) {
+                if (inst->DstReg.Index == c->OutputColor[i]) {
+                    pair->RGB.Target = i;
+                    pair->Alpha.Target = i;
+                    pair->RGB.OutputWriteMask |=
+                        inst->DstReg.WriteMask & RC_MASK_XYZ;
+                    pair->Alpha.OutputWriteMask |=
+                        GET_BIT(inst->DstReg.WriteMask, 3);
+                    break;
+                }
+            }
+        }
+       } else {
+               if (needrgb) {
+                       pair->RGB.DestIndex = inst->DstReg.Index;
+                       pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+               }
+
+               if (needalpha) {
+                       pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
+                       if (pair->Alpha.WriteMask) {
+                               pair->Alpha.DestIndex = inst->DstReg.Index;
+                       }
+               }
+       }
+
+       if (inst->WriteALUResult) {
+               pair->WriteALUResult = inst->WriteALUResult;
+               pair->ALUResultCompare = inst->ALUResultCompare;
+       }
+}
+
+
+static void check_opcode_support(struct r300_fragment_program_compiler *c,
+                                struct rc_sub_instruction *inst)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+       if (opcode->HasDstReg) {
+               if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
+                       rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
+                       return;
+               }
+       }
+
+       for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+               if (inst->SrcReg[i].RelAddr) {
+                       rc_error(&c->Base, "Fragment program does not support relative addressing "
+                                " of source operands.\n");
+                       return;
+               }
+       }
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct radeon_compiler *cc, void *user)
+{
+       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+
+       for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+           inst != &c->Base.Program.Instructions;
+           inst = inst->Next) {
+               const struct rc_opcode_info * opcode;
+               struct rc_sub_instruction copy;
+
+               if (inst->Type != RC_INSTRUCTION_NORMAL)
+                       continue;
+
+               opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+                       continue;
+
+               copy = inst->U.I;
+
+               check_opcode_support(c, &copy);
+
+               final_rewrite(&copy);
+               inst->Type = RC_INSTRUCTION_PAIR;
+               set_pair_instruction(c, &inst->U.P, &copy);
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c
new file mode 100644 (file)
index 0000000..fe5756e
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ *  1. Replace it with an empty clause
+ *  2. For every instruction in the original clause, try the given
+ *     transformations in order.
+ *  3. If one of the transformations returns GL_TRUE, assume that it
+ *     has emitted the appropriate instruction(s) into the new clause;
+ *     otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void rc_local_transform(
+       struct radeon_compiler * c,
+       void *user)
+{
+       struct radeon_program_transformation *transformations =
+               (struct radeon_program_transformation*)user;
+       struct rc_instruction * inst = c->Program.Instructions.Next;
+
+       while(inst != &c->Program.Instructions) {
+               struct rc_instruction * current = inst;
+               int i;
+
+               inst = inst->Next;
+
+               for(i = 0; transformations[i].function; ++i) {
+                       struct radeon_program_transformation* t = transformations + i;
+
+                       if (t->function(c, current, t->userData))
+                               break;
+               }
+       }
+}
+
+struct get_used_temporaries_data {
+       unsigned char * Used;
+       unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct get_used_temporaries_data * d = userdata;
+
+       if (file != RC_FILE_TEMPORARY)
+               return;
+
+       if (index >= d->UsedLength)
+               return;
+
+       d->Used[index] |= mask;
+}
+
+/**
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program.  This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
+ */
+void rc_get_used_temporaries(
+       struct radeon_compiler * c,
+       unsigned char * used,
+       unsigned int used_length)
+{
+       struct rc_instruction * inst;
+       struct get_used_temporaries_data d;
+       d.Used = used;
+       d.UsedLength = used_length;
+
+       for(inst = c->Program.Instructions.Next;
+                       inst != &c->Program.Instructions; inst = inst->Next) {
+
+               rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+               rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
+       }
+}
+
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+       struct radeon_compiler * c,
+       unsigned char * used,
+       unsigned int used_length,
+       unsigned int mask)
+{
+       int i;
+       for(i = 0; i < used_length; i++) {
+               if ((~used[i] & mask) == mask) {
+                       used[i] |= mask;
+                       return i;
+               }
+       }
+       return -1;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+       unsigned char used[RC_REGISTER_MAX_INDEX];
+       int free;
+
+       memset(used, 0, sizeof(used));
+
+       rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
+
+       free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+                                                               RC_MASK_XYZW);
+       if (free < 0) {
+               rc_error(c, "Ran out of temporary registers\n");
+               return 0;
+       }
+       return free;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+       struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+       memset(inst, 0, sizeof(struct rc_instruction));
+
+       inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+       inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+       inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+       inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+       return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+       inst->Prev = after;
+       inst->Next = after->Next;
+
+       inst->Prev->Next = inst;
+       inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+       struct rc_instruction * inst = rc_alloc_instruction(c);
+
+       rc_insert_instruction(after, inst);
+
+       return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+       inst->Prev->Next = inst->Next;
+       inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+       unsigned int ip = 0;
+       struct rc_instruction * inst;
+
+       for(inst = c->Program.Instructions.Next;
+           inst != &c->Program.Instructions;
+           inst = inst->Next) {
+               inst->IP = ip++;
+       }
+
+       c->Program.Instructions.IP = 0xcafedead;
+
+       return ip;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
new file mode 100644 (file)
index 0000000..b899ecc
--- /dev/null
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+       unsigned int File:4;
+
+       /** Negative values may be used for relative addressing. */
+       signed int Index:(RC_REGISTER_INDEX_BITS+1);
+       unsigned int RelAddr:1;
+
+       unsigned int Swizzle:12;
+
+       /** Take the component-wise absolute value */
+       unsigned int Abs:1;
+
+       /** Post-Abs negation. */
+       unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+       unsigned int File:3;
+       unsigned int Index:RC_REGISTER_INDEX_BITS;
+       unsigned int WriteMask:4;
+};
+
+struct rc_presub_instruction {
+       rc_presubtract_op Opcode;
+       struct rc_src_register SrcReg[2];
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+       struct rc_src_register SrcReg[3];
+       struct rc_dst_register DstReg;
+
+       /**
+        * Opcode of this instruction, according to \ref rc_opcode enums.
+        */
+       unsigned int Opcode:8;
+
+       /**
+        * Saturate each value of the result to the range [0,1] or [-1,1],
+        * according to \ref rc_saturate_mode enums.
+        */
+       unsigned int SaturateMode:2;
+
+       /**
+        * Writing to the special register RC_SPECIAL_ALU_RESULT
+        */
+       /*@{*/
+       unsigned int WriteALUResult:2;
+       unsigned int ALUResultCompare:3;
+       /*@}*/
+
+       /**
+        * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+        */
+       /*@{*/
+       /** Source texture unit. */
+       unsigned int TexSrcUnit:5;
+
+       /** Source texture target, one of the \ref rc_texture_target enums */
+       unsigned int TexSrcTarget:3;
+
+       /** True if tex instruction should do shadow comparison */
+       unsigned int TexShadow:1;
+
+       /**R500 Only.  How to swizzle the result of a TEX lookup*/
+       unsigned int TexSwizzle:12;
+       /*@}*/
+
+       /** This holds information about the presubtract operation used by
+        * this instruction. */
+       struct rc_presub_instruction PreSub;
+};
+
+typedef enum {
+       RC_INSTRUCTION_NORMAL = 0,
+       RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+       struct rc_instruction * Prev;
+       struct rc_instruction * Next;
+
+       rc_instruction_type Type;
+       union {
+               struct rc_sub_instruction I;
+               struct rc_pair_instruction P;
+       } U;
+
+       /**
+        * Warning: IPs are not stable. If you want to use them,
+        * you need to recompute them at the beginning of each pass
+        * using \ref rc_recompute_ips
+        */
+       unsigned int IP;
+};
+
+struct rc_program {
+       /**
+        * Instructions.Next points to the first instruction,
+        * Instructions.Prev points to the last instruction.
+        */
+       struct rc_instruction Instructions;
+
+       /* Long term, we should probably remove InputsRead & OutputsWritten,
+        * since updating dependent state can be fragile, and they aren't
+        * actually used very often. */
+       uint32_t InputsRead;
+       uint32_t OutputsWritten;
+       uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+       struct rc_constant_list Constants;
+};
+
+/**
+ * A transformation that can be passed to \ref rc_local_transform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+       int (*function)(
+               struct radeon_compiler*,
+               struct rc_instruction*,
+               void*);
+       void *userData;
+};
+
+void rc_local_transform(
+       struct radeon_compiler *c,
+       void *user);
+
+void rc_get_used_temporaries(
+       struct radeon_compiler * c,
+       unsigned char * used,
+       unsigned int used_length);
+
+int rc_find_free_temporary_list(
+       struct radeon_compiler * c,
+       unsigned char * used,
+       unsigned int used_length,
+       unsigned int mask);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
new file mode 100644 (file)
index 0000000..9fc9911
--- /dev/null
@@ -0,0 +1,1154 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+static struct rc_instruction *emit1(
+       struct radeon_compiler * c, struct rc_instruction * after,
+       rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+       struct rc_src_register SrcReg)
+{
+       struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+       fpi->U.I.Opcode = Opcode;
+       fpi->U.I.SaturateMode = Saturate;
+       fpi->U.I.DstReg = DstReg;
+       fpi->U.I.SrcReg[0] = SrcReg;
+       return fpi;
+}
+
+static struct rc_instruction *emit2(
+       struct radeon_compiler * c, struct rc_instruction * after,
+       rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+       struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+       struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+       fpi->U.I.Opcode = Opcode;
+       fpi->U.I.SaturateMode = Saturate;
+       fpi->U.I.DstReg = DstReg;
+       fpi->U.I.SrcReg[0] = SrcReg0;
+       fpi->U.I.SrcReg[1] = SrcReg1;
+       return fpi;
+}
+
+static struct rc_instruction *emit3(
+       struct radeon_compiler * c, struct rc_instruction * after,
+       rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+       struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+       struct rc_src_register SrcReg2)
+{
+       struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+       fpi->U.I.Opcode = Opcode;
+       fpi->U.I.SaturateMode = Saturate;
+       fpi->U.I.DstReg = DstReg;
+       fpi->U.I.SrcReg[0] = SrcReg0;
+       fpi->U.I.SrcReg[1] = SrcReg1;
+       fpi->U.I.SrcReg[2] = SrcReg2;
+       return fpi;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+       struct rc_dst_register dst = {0};
+       dst.File = RC_FILE_TEMPORARY;
+       dst.Index = index;
+       dst.WriteMask = mask;
+       return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+       .File = RC_FILE_NONE,
+       .Index = 0,
+       .Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+       .File = RC_FILE_NONE,
+       .Index = 0,
+       .Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+       .File = RC_FILE_NONE,
+       .Index = 0,
+       .Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+       struct rc_src_register src = srcreg_undefined;
+       src.File = file;
+       src.Index = index;
+       return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+       struct rc_src_register src = srcreg_undefined;
+       src.File = file;
+       src.Index = index;
+       src.Swizzle = swz;
+       return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+       struct rc_src_register newreg = reg;
+       newreg.Abs = 1;
+       newreg.Negate = RC_MASK_NONE;
+       return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+       struct rc_src_register newreg = reg;
+       newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+       return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+               rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+       struct rc_src_register swizzled = reg;
+       swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+       return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+               rc_swizzle x)
+{
+       return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+       return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+       return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+       return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+       return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+       const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned i;
+
+       assert(info->HasDstReg);
+
+       if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+               return 0;
+
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+                   inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+                       return 0;
+       }
+
+       return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+                                              struct rc_instruction *inst)
+{
+       unsigned tmp;
+
+       if (is_dst_safe_to_reuse(inst))
+               tmp = inst->U.I.DstReg.Index;
+       else
+               tmp = rc_find_free_temporary(c);
+
+       return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_src_register src = inst->U.I.SrcReg[0];
+       src.Abs = 1;
+       src.Negate = RC_MASK_NONE;
+       emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+       rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* Assuming:
+        *     ceil(x) = -floor(-x)
+        *
+        * After inlining floor:
+        *     ceil(x) = -(-x-frac(-x))
+        *
+        * After simplification:
+        *     ceil(x) = x+frac(-x)
+        */
+
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+       emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
+       emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+       rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+       struct rc_instruction *inst)
+{
+       /* CLAMP dst, src, min, max
+        *    into:
+        * MIN tmp, src, max
+        * MAX dst, tmp, min
+        */
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+       emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+               inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+       emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
+       rc_remove_instruction(inst);
+}
+
+static void transform_DP2(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_src_register src0 = inst->U.I.SrcReg[0];
+       struct rc_src_register src1 = inst->U.I.SrcReg[1];
+       src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+       src0.Swizzle &= ~(63 << (3 * 2));
+       src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+       src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+       src1.Swizzle &= ~(63 << (3 * 2));
+       src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+       emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+       rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_src_register src0 = inst->U.I.SrcReg[0];
+       src0.Negate &= ~RC_MASK_W;
+       src0.Swizzle &= ~(7 << (3 * 3));
+       src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+       emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+       rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+               swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+       rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+       emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
+       emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+       rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ *  tmp = VectorLoad(op0);
+ *  if (tmp.x < 0) tmp.x = 0;
+ *  if (tmp.y < 0) tmp.y = 0;
+ *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ *  result.x = 1.0;
+ *  result.y = tmp.x;
+ *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ *  result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       unsigned int constant;
+       unsigned int constant_swizzle;
+       unsigned int temp;
+       struct rc_src_register srctemp;
+
+       constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+       if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+               struct rc_instruction * inst_mov;
+
+               inst_mov = emit1(c, inst,
+                       RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+                       srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+               inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       }
+
+       temp = inst->U.I.DstReg.Index;
+       srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+       /* tmp.x = max(0.0, Src.x); */
+       /* tmp.y = max(0.0, Src.y); */
+       /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+       emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+               dstregtmpmask(temp, RC_MASK_XYW),
+               inst->U.I.SrcReg[0],
+               swizzle(srcreg(RC_FILE_CONSTANT, constant),
+                       RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+       emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+               dstregtmpmask(temp, RC_MASK_Z),
+               swizzle_wwww(srctemp),
+               negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+       /* tmp.w = Pow(tmp.y, tmp.w) */
+       emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+               dstregtmpmask(temp, RC_MASK_W),
+               swizzle_yyyy(srctemp));
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+               dstregtmpmask(temp, RC_MASK_W),
+               swizzle_wwww(srctemp),
+               swizzle_zzzz(srctemp));
+       emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+               dstregtmpmask(temp, RC_MASK_W),
+               swizzle_wwww(srctemp));
+
+       /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+               dstregtmpmask(temp, RC_MASK_Z),
+               negate(swizzle_xxxx(srctemp)),
+               swizzle_wwww(srctemp),
+               builtin_zero);
+
+       /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+       emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+               dstregtmpmask(temp, RC_MASK_XYW),
+               swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+               dst,
+               inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+               inst->U.I.DstReg,
+               inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+       struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
+       tempdst.WriteMask = RC_MASK_W;
+       tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+       emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+       emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+       rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+       emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SSG(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* result = sign(x)
+        *
+        *   CMP tmp0, -x, 1, 0
+        *   CMP tmp1, x, 1, 0
+        *   ADD result, tmp0, -tmp1;
+        */
+       struct rc_dst_register dst0;
+       unsigned tmp1;
+
+       /* 0 < x */
+       dst0 = try_to_reuse_dst(c, inst);
+       emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+             dst0,
+             negate(inst->U.I.SrcReg[0]),
+             builtin_one,
+             builtin_zero);
+
+       /* x < 0 */
+       tmp1 = rc_find_free_temporary(c);
+       emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+             dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             builtin_one,
+             builtin_zero);
+
+       /* Either both are zero, or one of them is one and the other is zero. */
+       /* result = tmp0 - tmp1 */
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, dst0.Index),
+             negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       inst->U.I.Opcode = RC_OPCODE_ADD;
+       inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
+               swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+               swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+               swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+               swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+               negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+
+       rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ *  ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+       struct radeon_compiler * c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       switch(inst->U.I.Opcode) {
+       case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+       case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+       case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+       case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
+       case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+       case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+       case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+       case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+       case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+       case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+       case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+       case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+       case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+       case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+       case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+       case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+       case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+       case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+       case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
+       case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+       case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+       case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+       default:
+               return 0;
+       }
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* Note: r500 can take absolute values, but r300 cannot. */
+       inst->U.I.Opcode = RC_OPCODE_MAX;
+       inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+       inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* There is no decent CMP available, so let's rig one up.
+        * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+        * The following sequence consumes zero to two temps and two extra slots
+        * (the second temp and the second slot is consumed by transform_LRP),
+        * but should be equivalent:
+        *
+        * SLT tmp0, src0, 0.0
+        * LRP dst, tmp0, src1, src2
+        *
+        * Yes, I know, I'm a mad scientist. ~ C. & M. */
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+       /* SLT tmp0, src0, 0.0 */
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+               dst,
+               inst->U.I.SrcReg[0], builtin_zero);
+
+       /* LRP dst, tmp0, src1, src2 */
+       transform_LRP(c,
+               emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+                     inst->U.I.DstReg,
+                     srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_instruction *next_inst = inst->Next;
+       transform_DP2(c, inst);
+       next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_src_register src0 = inst->U.I.SrcReg[0];
+       struct rc_src_register src1 = inst->U.I.SrcReg[1];
+       src0.Negate &= ~RC_MASK_W;
+       src0.Swizzle &= ~(7 << (3 * 3));
+       src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+       src1.Negate &= ~RC_MASK_W;
+       src1.Swizzle &= ~(7 << (3 * 3));
+       src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+       emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+       rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+       unsigned constant_swizzle;
+       int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+                                                        0.0000000000000000001,
+                                                        &constant_swizzle);
+
+       /* MOV dst, src */
+       dst.WriteMask = RC_MASK_XYZW;
+       emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+               dst,
+               inst->U.I.SrcReg[0]);
+
+       /* MAX dst.y, src, 0.00...001 */
+       emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+               dstregtmpmask(dst.Index, RC_MASK_Y),
+               srcreg(RC_FILE_TEMPORARY, dst.Index),
+               srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+       inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+       struct rc_instruction *inst)
+{
+       /* x = y  <==>  x >= y && y >= x */
+       int tmp = rc_find_free_temporary(c);
+
+       /* x <= y */
+       emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+             dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             inst->U.I.SrcReg[1]);
+
+       /* y <= x */
+       emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+             inst->U.I.DstReg,
+             inst->U.I.SrcReg[1],
+             inst->U.I.SrcReg[0]);
+
+       /* x && y  =  x * y */
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, tmp),
+             srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+       struct rc_instruction *inst)
+{
+       /* x != y  <==>  x < y || y < x */
+       int tmp = rc_find_free_temporary(c);
+
+       /* x < y */
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             inst->U.I.SrcReg[1]);
+
+       /* y < x */
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             inst->U.I.DstReg,
+             inst->U.I.SrcReg[1],
+             inst->U.I.SrcReg[0]);
+
+       /* x || y  =  max(x, y) */
+       emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, tmp),
+             srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+       rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* x > y  <==>  -x < -y */
+       inst->U.I.Opcode = RC_OPCODE_SLT;
+       inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+       inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* x <= y  <==>  -x >= -y */
+       inst->U.I.Opcode = RC_OPCODE_SGE;
+       inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+       inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+       struct rc_instruction* inst)
+{
+       /* result = sign(x)
+        *
+        *   SLT tmp0, 0, x;
+        *   SLT tmp1, x, 0;
+        *   ADD result, tmp0, -tmp1;
+        */
+       struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+       unsigned tmp1;
+
+       /* 0 < x */
+       dst0 = try_to_reuse_dst(c, inst);
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             dst0,
+             builtin_zero,
+             inst->U.I.SrcReg[0]);
+
+       /* x < 0 */
+       tmp1 = rc_find_free_temporary(c);
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             builtin_zero);
+
+       /* Either both are zero, or one of them is one and the other is zero. */
+       /* result = tmp0 - tmp1 */
+       emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, dst0.Index),
+             negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+       rc_remove_instruction(inst);
+}
+
+/**
+ * For use with rc_local_transform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+       struct radeon_compiler * c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       switch(inst->U.I.Opcode) {
+       case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+       case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+       case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+       case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+       case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+       case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
+       case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+       case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+       case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+       case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+       case RC_OPCODE_SEQ:
+               if (!c->is_r500) {
+                       transform_r300_vertex_SEQ(c, inst);
+                       return 1;
+               }
+               return 0;
+       case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+       case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+       case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+       case RC_OPCODE_SNE:
+               if (!c->is_r500) {
+                       transform_r300_vertex_SNE(c, inst);
+                       return 1;
+               }
+               return 0;
+       case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
+       case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+       case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+       case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+       default:
+               return 0;
+       }
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+       static const float SinCosConsts[2][4] = {
+               {
+                       1.273239545,            /* 4/PI */
+                       -0.405284735,           /* -4/(PI*PI) */
+                       3.141592654,            /* PI */
+                       0.2225                  /* weight */
+               },
+               {
+                       0.75,
+                       0.5,
+                       0.159154943,            /* 1/(2*PI) */
+                       6.283185307             /* 2*PI */
+               }
+       };
+       int i;
+
+       for(i = 0; i < 2; ++i)
+               constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+       struct radeon_compiler* c, struct rc_instruction * inst,
+       struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+       unsigned int tempreg = rc_find_free_temporary(c);
+
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+               swizzle_xxxx(src),
+               srcreg(RC_FILE_CONSTANT, constants[0]));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+               swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+               absolute(swizzle_xxxx(src)),
+               swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+               swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+               absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+               negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+               swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+               swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+               swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ *  MOV, ADD, MUL, MAD, FRC
+ */
+int r300_transform_trig_simple(struct radeon_compiler* c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       unsigned int constants[2];
+       unsigned int tempreg;
+
+       if (inst->U.I.Opcode != RC_OPCODE_COS &&
+           inst->U.I.Opcode != RC_OPCODE_SIN &&
+           inst->U.I.Opcode != RC_OPCODE_SCS)
+               return 0;
+
+       tempreg = rc_find_free_temporary(c);
+
+       sincos_constants(c, constants);
+
+       if (inst->U.I.Opcode == RC_OPCODE_COS) {
+               /* MAD tmp.x, src, 1/(2*PI), 0.75 */
+               /* FRC tmp.x, tmp.x */
+               /* MAD tmp.z, tmp.x, 2*PI, -PI */
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_xxxx(inst->U.I.SrcReg[0]),
+                       swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+               emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+               sin_approx(c, inst, inst->U.I.DstReg,
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       constants);
+       } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_xxxx(inst->U.I.SrcReg[0]),
+                       swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+               emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+               sin_approx(c, inst, inst->U.I.DstReg,
+                       swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       constants);
+       } else {
+               struct rc_dst_register dst;
+
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+                       swizzle_xxxx(inst->U.I.SrcReg[0]),
+                       swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+               emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+                       srcreg(RC_FILE_TEMPORARY, tempreg));
+               emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+                       srcreg(RC_FILE_TEMPORARY, tempreg),
+                       swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+                       negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+               dst = inst->U.I.DstReg;
+
+               dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+               sin_approx(c, inst, dst,
+                       swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       constants);
+
+               dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+               sin_approx(c, inst, dst,
+                       swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+                       constants);
+       }
+
+       rc_remove_instruction(inst);
+
+       return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       unsigned srctmp)
+{
+       if (inst->U.I.Opcode == RC_OPCODE_COS) {
+               emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+                       srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+       } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+               emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+                       inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+       } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+               struct rc_dst_register moddst = inst->U.I.DstReg;
+
+               if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+                       moddst.WriteMask = RC_MASK_X;
+                       emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+                               srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+               }
+               if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+                       moddst.WriteMask = RC_MASK_Y;
+                       emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+                               srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+               }
+       }
+
+       rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       static const float RCP_2PI = 0.15915494309189535;
+       unsigned int temp;
+       unsigned int constant;
+       unsigned int constant_swizzle;
+
+       if (inst->U.I.Opcode != RC_OPCODE_COS &&
+           inst->U.I.Opcode != RC_OPCODE_SIN &&
+           inst->U.I.Opcode != RC_OPCODE_SCS)
+               return 0;
+
+       temp = rc_find_free_temporary(c);
+       constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+               swizzle_xxxx(inst->U.I.SrcReg[0]),
+               srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+       emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+               srcreg(RC_FILE_TEMPORARY, temp));
+
+       r300_transform_SIN_COS_SCS(c, inst, temp);
+       return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       void *unused)
+{
+       static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+       unsigned int temp;
+       unsigned int constant;
+
+       if (inst->U.I.Opcode != RC_OPCODE_COS &&
+           inst->U.I.Opcode != RC_OPCODE_SIN &&
+           inst->U.I.Opcode != RC_OPCODE_SCS)
+               return 0;
+
+       /* Repeat x in the range [-PI, PI]:
+        *
+        *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+        */
+
+       temp = rc_find_free_temporary(c);
+       constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+               swizzle_xxxx(inst->U.I.SrcReg[0]),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+       emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+               srcreg(RC_FILE_TEMPORARY, temp));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+               srcreg(RC_FILE_TEMPORARY, temp),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+       r300_transform_SIN_COS_SCS(c, inst, temp);
+       return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+       struct rc_instruction* inst,
+       void* unused)
+{
+       if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+               return 0;
+
+       inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+       inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+       return 1;
+}
+
+/**
+ * IF Temp[0].x -\
+ * KILP         - > KIL -abs(Temp[0].x)
+ * ENDIF        -/
+ *
+ * This needs to be done in its own pass, because it modifies the instructions
+ * before and after KILP.
+ */
+void rc_transform_KILP(struct radeon_compiler * c, void *user)
+{
+       struct rc_instruction * inst;
+       for (inst = c->Program.Instructions.Next;
+                       inst != &c->Program.Instructions; inst = inst->Next) {
+
+               if (inst->U.I.Opcode != RC_OPCODE_KILP)
+                       continue;
+
+               inst->U.I.Opcode = RC_OPCODE_KIL;
+
+               if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+                               || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+                       inst->U.I.SrcReg[0] = negate(builtin_one);
+               } else {
+
+                       inst->U.I.SrcReg[0] =
+                               negate(absolute(inst->Prev->U.I.SrcReg[0]));
+                       /* Remove IF */
+                       rc_remove_instruction(inst->Prev);
+                       /* Remove ENDIF */
+                       rc_remove_instruction(inst->Next);
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h
new file mode 100644 (file)
index 0000000..b5f361e
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void*);
+
+int r300_transform_vertex_alu(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void*);
+
+int r300_transform_trig_simple(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void*);
+
+int radeonTransformTrigScale(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void*);
+
+int r300_transform_trig_scale_vertex(
+       struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       void*);
+
+int radeonTransformDeriv(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void*);
+
+void rc_transform_KILP(struct radeon_compiler * c,
+                      void *user);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
new file mode 100644 (file)
index 0000000..2457733
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+       RC_SATURATE_NONE = 0,
+       RC_SATURATE_ZERO_ONE,
+       RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+       RC_TEXTURE_2D_ARRAY,
+       RC_TEXTURE_1D_ARRAY,
+       RC_TEXTURE_CUBE,
+       RC_TEXTURE_3D,
+       RC_TEXTURE_RECT,
+       RC_TEXTURE_2D,
+       RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+       /**
+        * Used to indicate unused register descriptions and
+        * source register that use a constant swizzle.
+        */
+       RC_FILE_NONE = 0,
+       RC_FILE_TEMPORARY,
+
+       /**
+        * Input register.
+        *
+        * \note The compiler attaches no implicit semantics to input registers.
+        * Fragment/vertex program specific semantics must be defined explicitly
+        * using the appropriate compiler interfaces.
+        */
+       RC_FILE_INPUT,
+
+       /**
+        * Output register.
+        *
+        * \note The compiler attaches no implicit semantics to input registers.
+        * Fragment/vertex program specific semantics must be defined explicitly
+        * using the appropriate compiler interfaces.
+        */
+       RC_FILE_OUTPUT,
+       RC_FILE_ADDRESS,
+
+       /**
+        * Indicates a constant from the \ref rc_constant_list .
+        */
+       RC_FILE_CONSTANT,
+
+       /**
+        * Indicates a special register, see RC_SPECIAL_xxx.
+        */
+       RC_FILE_SPECIAL,
+
+       /**
+        * Indicates this register should use the result of the presubtract
+        * operation.
+        */
+       RC_FILE_PRESUB
+} rc_register_file;
+
+enum {
+       /** R500 fragment program ALU result "register" */
+       RC_SPECIAL_ALU_RESULT = 0,
+
+       /** Must be last */
+       RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+       RC_SWIZZLE_X = 0,
+       RC_SWIZZLE_Y,
+       RC_SWIZZLE_Z,
+       RC_SWIZZLE_W,
+       RC_SWIZZLE_ZERO,
+       RC_SWIZZLE_ONE,
+       RC_SWIZZLE_HALF,
+       RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+       do { \
+               (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+       } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+       RC_ALURESULT_NONE = 0,
+       RC_ALURESULT_X,
+       RC_ALURESULT_W
+} rc_write_aluresult;
+
+typedef enum {
+       RC_PRESUB_NONE = 0,
+
+       /** 1 - 2 * src0 */
+       RC_PRESUB_BIAS,
+
+       /** src1 - src0 */
+       RC_PRESUB_SUB,
+
+       /** src1 + src0 */
+       RC_PRESUB_ADD,
+
+       /** 1 - src0 */
+       RC_PRESUB_INV
+} rc_presubtract_op;
+
+static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
+       switch(op){
+       case RC_PRESUB_BIAS:
+       case RC_PRESUB_INV:
+               return 1;
+       case RC_PRESUB_ADD:
+       case RC_PRESUB_SUB:
+               return 2;
+       default:
+               return 0;
+       }
+}
+
+#define RC_SOURCE_NONE  0x0
+#define RC_SOURCE_RGB   0x1
+#define RC_SOURCE_ALPHA 0x2
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c
new file mode 100644 (file)
index 0000000..5231595
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+       unsigned int rgb, unsigned int alpha,
+       rc_register_file file, unsigned int index)
+{
+       int candidate = -1;
+       int candidate_quality = -1;
+       unsigned int alpha_used = 0;
+       unsigned int rgb_used = 0;
+       int i;
+
+       if ((!rgb && !alpha) || file == RC_FILE_NONE)
+               return 0;
+
+       /* Make sure only one presubtract operation is used per instruction. */
+       if (file == RC_FILE_PRESUB) {
+               if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
+                       && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+                               return -1;
+               }
+
+               if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
+                       && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+                               return -1;
+               }
+       }
+
+       for(i = 0; i < 3; ++i) {
+               int q = 0;
+               if (rgb) {
+                       if (pair->RGB.Src[i].Used) {
+                               if (pair->RGB.Src[i].File != file ||
+                                   pair->RGB.Src[i].Index != index) {
+                                       rgb_used++;
+                                       continue;
+                               }
+                               q++;
+                       }
+               }
+               if (alpha) {
+                       if (pair->Alpha.Src[i].Used) {
+                               if (pair->Alpha.Src[i].File != file ||
+                                   pair->Alpha.Src[i].Index != index) {
+                                       alpha_used++;
+                                       continue;
+                               }
+                               q++;
+                       }
+               }
+               if (q > candidate_quality) {
+                       candidate_quality = q;
+                       candidate = i;
+               }
+       }
+
+       if (file == RC_FILE_PRESUB) {
+               candidate = RC_PAIR_PRESUB_SRC;
+       } else if (candidate < 0 || (rgb && rgb_used > 2)
+                       || (alpha && alpha_used > 2)) {
+               return -1;
+       }
+
+       /* candidate >= 0 */
+
+       if (rgb) {
+               pair->RGB.Src[candidate].Used = 1;
+               pair->RGB.Src[candidate].File = file;
+               pair->RGB.Src[candidate].Index = index;
+               if (candidate == RC_PAIR_PRESUB_SRC) {
+                       /* For registers with the RC_FILE_PRESUB file,
+                        * the index stores the presubtract op. */
+                       int src_regs = rc_presubtract_src_reg_count(index);
+                       for(i = 0; i < src_regs; i++) {
+                               pair->RGB.Src[i].Used = 1;
+                       }
+               }
+       }
+       if (alpha) {
+               pair->Alpha.Src[candidate].Used = 1;
+               pair->Alpha.Src[candidate].File = file;
+               pair->Alpha.Src[candidate].Index = index;
+               if (candidate == RC_PAIR_PRESUB_SRC) {
+                       /* For registers with the RC_FILE_PRESUB file,
+                        * the index stores the presubtract op. */
+                       int src_regs = rc_presubtract_src_reg_count(index);
+                       for(i=0; i < src_regs; i++) {
+                               pair->Alpha.Src[i].Used = 1;
+                       }
+               }
+       }
+
+       return candidate;
+}
+
+static void pair_foreach_source_callback(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb,
+       unsigned int swz,
+       unsigned int src)
+{
+       /* swz > 3 means that the swizzle is either not used, or a constant
+        * swizzle (e.g. 0, 1, 0.5). */
+       if(swz > 3)
+               return;
+
+       if(swz == RC_SWIZZLE_W) {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       unsigned int src_count = rc_presubtract_src_reg_count(
+                               pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+                       for(i = 0; i < src_count; i++) {
+                               cb(data, &pair->Alpha.Src[i]);
+                       }
+               } else {
+                       cb(data, &pair->Alpha.Src[src]);
+               }
+       } else {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       unsigned int src_count = rc_presubtract_src_reg_count(
+                               pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+                       for(i = 0; i < src_count; i++) {
+                               cb(data, &pair->RGB.Src[i]);
+                       }
+               }
+               else {
+                       cb(data, &pair->RGB.Src[src]);
+               }
+       }
+}
+
+void rc_pair_foreach_source_that_alpha_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb)
+{
+       unsigned int i;
+       const struct rc_opcode_info * info =
+                               rc_get_opcode_info(pair->Alpha.Opcode);
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               pair_foreach_source_callback(pair, data, cb,
+                                       GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
+                                       pair->Alpha.Arg[i].Source);
+       }
+}
+
+void rc_pair_foreach_source_that_rgb_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb)
+{
+       unsigned int i;
+       const struct rc_opcode_info * info =
+                               rc_get_opcode_info(pair->RGB.Opcode);
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               unsigned int chan;
+               unsigned int swz = RC_SWIZZLE_UNUSED;
+               /* Find a swizzle that is either X,Y,Z,or W.  We assume here
+                * that if one channel swizzles X,Y, or Z, then none of the
+                * other channels swizzle W, and vice-versa. */
+               for(chan = 0; chan < 4; chan++) {
+                       swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
+                       if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+                       || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
+                               continue;
+               }
+               pair_foreach_source_callback(pair, data, cb,
+                                       swz,
+                                       pair->RGB.Arg[i].Source);
+       }
+}
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_arg * arg)
+{
+       unsigned int type;
+
+       type = rc_source_type_swz(arg->Swizzle);
+
+       if (type & RC_SOURCE_RGB) {
+               return &pair_inst->RGB.Src[arg->Source];
+       } else if (type & RC_SOURCE_ALPHA) {
+               return &pair_inst->Alpha.Src[arg->Source];
+       } else {
+               return NULL;
+       }
+}
+
+int rc_pair_get_src_index(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_source * src)
+{
+       int i;
+       for (i = 0; i < 3; i++) {
+               if (&pair_inst->RGB.Src[i] == src
+                       || &pair_inst->Alpha.Src[i] == src) {
+                       return i;
+               }
+       }
+       return -1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
new file mode 100644 (file)
index 0000000..a957ea9
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
+ * the presubtract value will be used, and
+ * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
+ */
+#define RC_PAIR_PRESUB_SRC 3
+
+struct rc_pair_instruction_source {
+       unsigned int Used:1;
+       unsigned int File:3;
+       unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct rc_pair_instruction_arg {
+       unsigned int Source:2;
+       unsigned int Swizzle:12;
+       unsigned int Abs:1;
+       unsigned int Negate:1;
+};
+
+struct rc_pair_sub_instruction {
+       unsigned int Opcode:8;
+       unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+       unsigned int WriteMask:4;
+       unsigned int Target:2;
+       unsigned int OutputWriteMask:3;
+       unsigned int DepthWriteMask:1;
+       unsigned int Saturate:1;
+
+       struct rc_pair_instruction_source Src[4];
+       struct rc_pair_instruction_arg Arg[3];
+};
+
+struct rc_pair_instruction {
+       struct rc_pair_sub_instruction RGB;
+       struct rc_pair_sub_instruction Alpha;
+
+       unsigned int WriteALUResult:2;
+       unsigned int ALUResultCompare:3;
+       unsigned int Nop:1;
+};
+
+typedef void (*rc_pair_foreach_src_fn)
+                       (void *, struct rc_pair_instruction_source *);
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+       unsigned int rgb, unsigned int alpha,
+       rc_register_file file, unsigned int index);
+
+void rc_pair_foreach_source_that_alpha_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb);
+
+void rc_pair_foreach_source_that_rgb_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb);
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_source * src);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct radeon_compiler *cc, void *user);
+void rc_pair_schedule(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
+void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
new file mode 100644 (file)
index 0000000..390d131
--- /dev/null
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+       switch(target) {
+       case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+       case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+       case RC_TEXTURE_CUBE: return "CUBE";
+       case RC_TEXTURE_3D: return "3D";
+       case RC_TEXTURE_RECT: return "RECT";
+       case RC_TEXTURE_2D: return "2D";
+       case RC_TEXTURE_1D: return "1D";
+       default: return "BAD_TEXTURE_TARGET";
+       }
+}
+
+static const char * presubtract_op_to_string(rc_presubtract_op op)
+{
+       switch(op) {
+       case RC_PRESUB_NONE:
+               return "NONE";
+       case RC_PRESUB_BIAS:
+               return "(1 - 2 * src0)";
+       case RC_PRESUB_SUB:
+               return "(src1 - src0)";
+       case RC_PRESUB_ADD:
+               return "(src1 + src0)";
+       case RC_PRESUB_INV:
+               return "(1 - src0)";
+       default:
+               return "BAD_PRESUBTRACT_OP";
+       }
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+       if (func == RC_COMPARE_FUNC_NEVER) {
+               fprintf(f, "false");
+       } else if (func == RC_COMPARE_FUNC_ALWAYS) {
+               fprintf(f, "true");
+       } else {
+               const char * op;
+               switch(func) {
+               case RC_COMPARE_FUNC_LESS: op = "<"; break;
+               case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+               case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+               case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+               case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+               case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+               default: op = "???"; break;
+               }
+               fprintf(f, "%s %s %s", lhs, op, rhs);
+       }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+       if (file == RC_FILE_NONE) {
+               fprintf(f, "none");
+       } else if (file == RC_FILE_SPECIAL) {
+               switch(index) {
+               case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+               default: fprintf(f, "special[%i]", index); break;
+               }
+       } else {
+               const char * filename;
+               switch(file) {
+               case RC_FILE_TEMPORARY: filename = "temp"; break;
+               case RC_FILE_INPUT: filename = "input"; break;
+               case RC_FILE_OUTPUT: filename = "output"; break;
+               case RC_FILE_ADDRESS: filename = "addr"; break;
+               case RC_FILE_CONSTANT: filename = "const"; break;
+               default: filename = "BAD FILE"; break;
+               }
+               fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+       }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+       if (mask & RC_MASK_X) fprintf(f, "x");
+       if (mask & RC_MASK_Y) fprintf(f, "y");
+       if (mask & RC_MASK_Z) fprintf(f, "z");
+       if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+       rc_print_register(f, dst.File, dst.Index, 0);
+       if (dst.WriteMask != RC_MASK_XYZW) {
+               fprintf(f, ".");
+               rc_print_mask(f, dst.WriteMask);
+       }
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+       switch(swz) {
+       case RC_SWIZZLE_X: return 'x';
+       case RC_SWIZZLE_Y: return 'y';
+       case RC_SWIZZLE_Z: return 'z';
+       case RC_SWIZZLE_W: return 'w';
+       case RC_SWIZZLE_ZERO: return '0';
+       case RC_SWIZZLE_ONE: return '1';
+       case RC_SWIZZLE_HALF: return 'H';
+       case RC_SWIZZLE_UNUSED: return '_';
+       }
+       fprintf(stderr, "bad swz: %u\n", swz);
+       return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+       unsigned int comp;
+       for(comp = 0; comp < 4; ++comp) {
+               rc_swizzle swz = GET_SWZ(swizzle, comp);
+               if (GET_BIT(negate, comp))
+                       fprintf(f, "-");
+               fprintf(f, "%c", rc_swizzle_char(swz));
+       }
+}
+
+static void rc_print_presub_instruction(FILE * f,
+                                       struct rc_presub_instruction inst)
+{
+       fprintf(f,"(");
+       switch(inst.Opcode){
+       case RC_PRESUB_BIAS:
+               fprintf(f, "1 - 2 * ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       case RC_PRESUB_SUB:
+               rc_print_register(f, inst.SrcReg[1].File,
+                               inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+               fprintf(f, " - ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       case RC_PRESUB_ADD:
+               rc_print_register(f, inst.SrcReg[1].File,
+                               inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+               fprintf(f, " + ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       case RC_PRESUB_INV:
+               fprintf(f, "1 - ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       default:
+               break;
+       }
+       fprintf(f, ")");
+}
+
+static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
+                                               struct rc_src_register src)
+{
+       int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+       if (src.Negate == RC_MASK_XYZW)
+               fprintf(f, "-");
+       if (src.Abs)
+               fprintf(f, "|");
+
+       if(src.File == RC_FILE_PRESUB)
+               rc_print_presub_instruction(f, inst->U.I.PreSub);
+       else
+               rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+       if (src.Abs && !trivial_negate)
+               fprintf(f, "|");
+
+       if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+               fprintf(f, ".");
+               rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+       }
+
+       if (src.Abs && trivial_negate)
+               fprintf(f, "|");
+}
+
+static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
+{
+       switch (opcode) {
+       case RC_OPCODE_IF:
+       case RC_OPCODE_BGNLOOP:
+               return (*branch_depth)++ * 2;
+
+       case RC_OPCODE_ENDIF:
+       case RC_OPCODE_ENDLOOP:
+               assert(*branch_depth > 0);
+               return --(*branch_depth) * 2;
+
+       case RC_OPCODE_ELSE:
+               assert(*branch_depth > 0);
+               return (*branch_depth - 1) * 2;
+
+       default:
+               return *branch_depth * 2;
+       }
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
+{
+       const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned int reg;
+       unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
+
+       for (unsigned i = 0; i < spaces; i++)
+               fprintf(f, " ");
+
+       fprintf(f, "%s", opcode->Name);
+
+       switch(inst->U.I.SaturateMode) {
+       case RC_SATURATE_NONE: break;
+       case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+       case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+       default: fprintf(f, "_BAD_SAT"); break;
+       }
+
+       if (opcode->HasDstReg) {
+               fprintf(f, " ");
+               rc_print_dst_register(f, inst->U.I.DstReg);
+               if (opcode->NumSrcRegs)
+                       fprintf(f, ",");
+       }
+
+       for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+               if (reg > 0)
+                       fprintf(f, ",");
+               fprintf(f, " ");
+               rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
+       }
+
+       if (opcode->HasTexture) {
+               fprintf(f, ", %s%s[%u]",
+                       textarget_to_string(inst->U.I.TexSrcTarget),
+                       inst->U.I.TexShadow ? "SHADOW" : "",
+                       inst->U.I.TexSrcUnit);
+       }
+
+       fprintf(f, ";");
+
+       if (inst->U.I.WriteALUResult) {
+               fprintf(f, " [aluresult = (");
+               rc_print_comparefunc(f,
+                       (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+                       inst->U.I.ALUResultCompare, "0");
+               fprintf(f, ")]");
+       }
+
+       fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
+{
+       struct rc_pair_instruction * inst = &fullinst->U.P;
+       int printedsrc = 0;
+       unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
+                                             inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
+
+       for (unsigned i = 0; i < spaces; i++)
+               fprintf(f, " ");
+
+       for(unsigned int src = 0; src < 3; ++src) {
+               if (inst->RGB.Src[src].Used) {
+                       if (printedsrc)
+                               fprintf(f, ", ");
+                       fprintf(f, "src%i.xyz = ", src);
+                       rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+                       printedsrc = 1;
+               }
+               if (inst->Alpha.Src[src].Used) {
+                       if (printedsrc)
+                               fprintf(f, ", ");
+                       fprintf(f, "src%i.w = ", src);
+                       rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+                       printedsrc = 1;
+               }
+       }
+       if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               fprintf(f, ", srcp.xyz = %s",
+                       presubtract_op_to_string(
+                                       inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
+       }
+       if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               fprintf(f, ", srcp.w = %s",
+                       presubtract_op_to_string(
+                                       inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
+       }
+       fprintf(f, "\n");
+
+       if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+               for (unsigned i = 0; i < spaces; i++)
+                       fprintf(f, " ");
+
+               fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+               if (inst->RGB.WriteMask)
+                       fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+                               (inst->RGB.WriteMask & 1) ? "x" : "",
+                               (inst->RGB.WriteMask & 2) ? "y" : "",
+                               (inst->RGB.WriteMask & 4) ? "z" : "");
+               if (inst->RGB.OutputWriteMask)
+                       fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+                               (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+                               (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+                               (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+               if (inst->WriteALUResult == RC_ALURESULT_X)
+                       fprintf(f, " aluresult");
+
+               for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+                       const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+                       const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+                       fprintf(f, ", %s%ssrc", neg, abs);
+                       if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+                               fprintf(f,"p");
+                       else
+                               fprintf(f,"%d", inst->RGB.Arg[arg].Source);
+                       fprintf(f,".%c%c%c%s",
+                               rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+                               rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+                               rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+                               abs);
+               }
+               fprintf(f, "\n");
+       }
+
+       if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+               for (unsigned i = 0; i < spaces; i++)
+                       fprintf(f, " ");
+
+               fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+               if (inst->Alpha.WriteMask)
+                       fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+               if (inst->Alpha.OutputWriteMask)
+                       fprintf(f, " color[%i].w", inst->Alpha.Target);
+               if (inst->Alpha.DepthWriteMask)
+                       fprintf(f, " depth.w");
+               if (inst->WriteALUResult == RC_ALURESULT_W)
+                       fprintf(f, " aluresult");
+
+               for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+                       const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+                       const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+                       fprintf(f, ", %s%ssrc", neg, abs);
+                       if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+                               fprintf(f,"p");
+                       else
+                               fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
+                       fprintf(f,".%c%s",
+                               rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
+               }
+               fprintf(f, "\n");
+       }
+
+       if (inst->WriteALUResult) {
+               for (unsigned i = 0; i < spaces; i++)
+                       fprintf(f, " ");
+
+               fprintf(f, "      [aluresult = (");
+               rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+               fprintf(f, ")]\n");
+       }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+       unsigned int linenum = 0;
+       unsigned branch_depth = 0;
+       struct rc_instruction *inst;
+
+       fprintf(stderr, "# Radeon Compiler Program\n");
+
+       for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+               fprintf(stderr, "%3d: ", linenum);
+
+               if (inst->Type == RC_INSTRUCTION_PAIR)
+                       rc_print_pair_instruction(stderr, inst, &branch_depth);
+               else
+                       rc_print_normal_instruction(stderr, inst, &branch_depth);
+
+               linenum++;
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
new file mode 100644 (file)
index 0000000..8d16b2c
--- /dev/null
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+#include "radeon_compiler_util.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
+                                               int tmu)
+{
+       struct rc_src_register reg = { 0, };
+
+       if (compiler->enable_shadow_ambient) {
+               reg.File = RC_FILE_CONSTANT;
+               reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+                                                  RC_STATE_SHADOW_AMBIENT, tmu);
+               reg.Swizzle = RC_SWIZZLE_WWWW;
+       } else {
+               reg.File = RC_FILE_NONE;
+               reg.Swizzle = RC_SWIZZLE_0000;
+       }
+
+       reg.Swizzle = combine_swizzles(reg.Swizzle,
+                               compiler->state.unit[tmu].texture_swizzle);
+       return reg;
+}
+
+static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
+                                               int tmu)
+{
+       struct rc_src_register reg = { 0, };
+
+       reg.File = RC_FILE_NONE;
+       reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
+                               compiler->state.unit[tmu].texture_swizzle);
+       return reg;
+}
+
+static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
+                           struct rc_instruction *inst,
+                           unsigned state_constant)
+{
+       struct rc_instruction *inst_mov;
+
+       unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+       inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+       inst_mov->U.I.Opcode = RC_OPCODE_MUL;
+       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_mov->U.I.DstReg.Index = temp;
+       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+       inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+       inst_mov->U.I.SrcReg[1].Index =
+                       rc_constants_add_state(&compiler->Base.Program.Constants,
+                                              state_constant, inst->U.I.TexSrcUnit);
+
+       reset_srcreg(&inst->U.I.SrcReg[0]);
+       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[0].Index = temp;
+}
+
+static void projective_divide(struct r300_fragment_program_compiler *compiler,
+                             struct rc_instruction *inst)
+{
+       struct rc_instruction *inst_mul, *inst_rcp;
+
+       unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+       inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+       inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+       inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_rcp->U.I.DstReg.Index = temp;
+       inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+       inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+       /* Because the input can be arbitrarily swizzled,
+        * read the component mapped to W. */
+       inst_rcp->U.I.SrcReg[0].Swizzle =
+               RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+
+       inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+       inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+       inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       inst_mul->U.I.DstReg.Index = temp;
+       inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+       inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+       inst_mul->U.I.SrcReg[1].Index = temp;
+       inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+       reset_srcreg(&inst->U.I.SrcReg[0]);
+       inst->U.I.Opcode = RC_OPCODE_TEX;
+       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+       inst->U.I.SrcReg[0].Index = temp;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ *  - implement texture compare (shadow extensions)
+ *  - extract non-native source / destination operands
+ *  - premultiply texture coordinates for RECT
+ *  - extract operand swizzles
+ *  - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void* data)
+{
+       struct r300_fragment_program_compiler *compiler =
+               (struct r300_fragment_program_compiler*)data;
+       rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+       int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+                     compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
+
+       if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+               inst->U.I.Opcode != RC_OPCODE_TXB &&
+               inst->U.I.Opcode != RC_OPCODE_TXP &&
+               inst->U.I.Opcode != RC_OPCODE_TXD &&
+               inst->U.I.Opcode != RC_OPCODE_TXL &&
+               inst->U.I.Opcode != RC_OPCODE_KIL)
+               return 0;
+
+       /* ARB_shadow & EXT_shadow_funcs */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+               ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+                (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+               rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+               if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+                       inst->U.I.Opcode = RC_OPCODE_MOV;
+
+                       if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+                               inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+                       } else {
+                               inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+                       }
+
+                       return 1;
+               } else {
+                       struct rc_instruction * inst_rcp = NULL;
+                       struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
+                       unsigned tmp_texsample;
+                       unsigned tmp_sum;
+                       int pass, fail;
+
+                       /* Save the output register. */
+                       struct rc_dst_register output_reg = inst->U.I.DstReg;
+                       unsigned saturate_mode = inst->U.I.SaturateMode;
+
+                       /* Redirect TEX to a new temp. */
+                       tmp_texsample = rc_find_free_temporary(c);
+                       inst->U.I.SaturateMode = 0;
+                       inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst->U.I.DstReg.Index = tmp_texsample;
+                       inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+                       tmp_sum = rc_find_free_temporary(c);
+
+                       if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+                               /* Compute 1/W. */
+                               inst_rcp = rc_insert_new_instruction(c, inst);
+                               inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+                               inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_rcp->U.I.DstReg.Index = tmp_sum;
+                               inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+                               inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                               inst_rcp->U.I.SrcReg[0].Swizzle =
+                                       RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+                       }
+
+                       /* Divide Z by W (if it's TXP) and saturate. */
+                       inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+                       inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
+                       inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mul->U.I.DstReg.Index = tmp_sum;
+                       inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
+                       inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+                       inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                       inst_mul->U.I.SrcReg[0].Swizzle =
+                               RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
+                       if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+                               inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                               inst_mul->U.I.SrcReg[1].Index = tmp_sum;
+                               inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+                       }
+
+                       /* Add the depth texture value. */
+                       inst_add = rc_insert_new_instruction(c, inst_mul);
+                       inst_add->U.I.Opcode = RC_OPCODE_ADD;
+                       inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_add->U.I.DstReg.Index = tmp_sum;
+                       inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
+                       inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_add->U.I.SrcReg[0].Index = tmp_sum;
+                       inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+                       inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                       inst_add->U.I.SrcReg[1].Index = tmp_texsample;
+                       inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+
+                       /* Note that SrcReg[0] is r, SrcReg[1] is tex and:
+                        *   LESS:    r  < tex  <=>      -tex+r < 0
+                        *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
+                        *   GREATER: r  > tex  <=>       tex-r < 0
+                        *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
+                        *   EQUAL:   GEQUAL
+                        *   NOTEQUAL:LESS
+                        */
+
+                       /* This negates either r or tex: */
+                       if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
+                           comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
+                               inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
+                       else
+                               inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+                       /* This negates the whole expresion: */
+                       if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
+                           comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+                               pass = 1;
+                               fail = 2;
+                       } else {
+                               pass = 2;
+                               fail = 1;
+                       }
+
+                       inst_cmp = rc_insert_new_instruction(c, inst_add);
+                       inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+                       inst_cmp->U.I.SaturateMode = saturate_mode;
+                       inst_cmp->U.I.DstReg = output_reg;
+                       inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+                       inst_cmp->U.I.SrcReg[0].Swizzle =
+                                       combine_swizzles(RC_SWIZZLE_WWWW,
+                                                        compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
+                       inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+                       inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+
+                       assert(tmp_texsample != tmp_sum);
+               }
+       }
+
+       /* R300 cannot sample from rectangles and the wrap mode fallback needs
+        * normalized coordinates anyway. */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+           is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
+               scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
+               inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+       }
+
+       /* Divide by W if needed. */
+       if (inst->U.I.Opcode == RC_OPCODE_TXP &&
+           (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
+            compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
+               projective_divide(compiler, inst);
+       }
+
+       /* Texture wrap modes don't work on NPOT textures.
+        *
+        * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+        * mirroring are not. If we need to repeat, we do:
+        *
+        * MUL temp, texcoord, <scaling factor constant>
+        * FRC temp, temp ; Discard integer portion of coords
+        *
+        * This gives us coords in [0, 1].
+        *
+        * Mirroring is trickier. We're going to start out like repeat:
+        *
+        * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+        * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+        *                            ; so scale to [0, 1]
+        * FRC temp, temp ; Make the pattern repeat
+        * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+        * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+        *                              ; The pattern is backwards, so reverse it (1-x).
+        *
+        * This gives us coords in [0, 1].
+        *
+        * ~ C & M. ;)
+        */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+           wrapmode != RC_WRAP_NONE) {
+               struct rc_instruction *inst_mov;
+               unsigned temp = rc_find_free_temporary(c);
+
+               if (wrapmode == RC_WRAP_REPEAT) {
+                       /* Both instructions will be paired up. */
+                       struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+                       inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_frc->U.I.DstReg.Index = temp;
+                       inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+               } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+                       /*
+                        * Function:
+                        *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+                        *
+                        * Code:
+                        *   MUL temp, src0, 0.5
+                        *   FRC temp, temp
+                        *   MAD temp, temp, 2, -1
+                        *   ADD temp, 1, -abs(temp)
+                        */
+
+                       struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+                       unsigned two, two_swizzle;
+
+                       inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+                       inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mul->U.I.DstReg.Index = temp;
+                       inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                       inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+                       inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+                       inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_frc->U.I.DstReg.Index = temp;
+                       inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_frc->U.I.SrcReg[0].Index = temp;
+                       inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+                       two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+                       inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+                       inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mad->U.I.DstReg.Index = temp;
+                       inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_mad->U.I.SrcReg[0].Index = temp;
+                       inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+                       inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+                       inst_mad->U.I.SrcReg[1].Index = two;
+                       inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+                       inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+                       inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+                       inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_add->U.I.Opcode = RC_OPCODE_ADD;
+                       inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_add->U.I.DstReg.Index = temp;
+                       inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+                       inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                       inst_add->U.I.SrcReg[1].Index = temp;
+                       inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+                       inst_add->U.I.SrcReg[1].Abs = 1;
+                       inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+               } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+                       /*
+                        * Mirrored clamp modes are bloody simple, we just use abs
+                        * to mirror [0, 1] into [-1, 0]. This works for
+                        * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+                        */
+                       struct rc_instruction *inst_mov;
+
+                       inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+                       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+                       inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                       inst_mov->U.I.DstReg.Index = temp;
+                       inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                       inst_mov->U.I.SrcReg[0].Abs = 1;
+               }
+
+               /* Preserve W for TXP/TXB. */
+               inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.DstReg.Index = temp;
+               inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+               inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+               reset_srcreg(&inst->U.I.SrcReg[0]);
+               inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst->U.I.SrcReg[0].Index = temp;
+       }
+
+       /* NPOT -> POT conversion for 3D textures. */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+           compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
+               struct rc_instruction *inst_mov;
+               unsigned temp = rc_find_free_temporary(c);
+
+               /* Saturate XYZ. */
+               inst_mov = rc_insert_new_instruction(c, inst->Prev);
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+               inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.DstReg.Index = temp;
+               inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+               inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+               /* Copy W. */
+               inst_mov = rc_insert_new_instruction(c, inst->Prev);
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.DstReg.Index = temp;
+               inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+               inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+               reset_srcreg(&inst->U.I.SrcReg[0]);
+               inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst->U.I.SrcReg[0].Index = temp;
+
+               scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
+       }
+
+       /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
+        * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
+        */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+           compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
+               unsigned two, two_swizzle;
+               struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
+
+               two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
+
+               inst_mul = rc_insert_new_instruction(c, inst);
+               inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+               inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+               inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
+               inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
+               inst_mul->U.I.SrcReg[1].Index = two;
+               inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
+
+               inst_mad = rc_insert_new_instruction(c, inst_mul);
+               inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+               inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+               inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+               inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
+               inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
+               inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
+
+               inst_cnd = rc_insert_new_instruction(c, inst_mad);
+               inst_cnd->U.I.Opcode = RC_OPCODE_CND;
+               inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
+               inst_cnd->U.I.DstReg = inst->U.I.DstReg;
+               inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
+               inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+               inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+               inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
+               inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+               inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+
+               inst->U.I.SaturateMode = 0;
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
+               inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       }
+
+       /* Cannot write texture to output registers or with saturate (all chips),
+        * or with masks (non-r500). */
+       if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+               (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+                inst->U.I.SaturateMode ||
+                (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+               struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
+               inst_mov->U.I.DstReg = inst->U.I.DstReg;
+               inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+               inst->U.I.SaturateMode = 0;
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+               inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       }
+
+       /* Cannot read texture coordinate from constants file */
+       if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+               struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+               inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+               inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+               reset_srcreg(&inst->U.I.SrcReg[0]);
+               inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+       }
+
+       return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h
new file mode 100644 (file)
index 0000000..a010505
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
new file mode 100644 (file)
index 0000000..7d76585
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_remove_constants.h"
+#include "radeon_dataflow.h"
+
+struct mark_used_data {
+       unsigned char * const_used;
+       unsigned * has_rel_addr;
+};
+
+static void remap_regs(void * userdata, struct rc_instruction * inst,
+                       rc_register_file * pfile, unsigned int * pindex)
+{
+       unsigned *inv_remap_table = userdata;
+
+       if (*pfile == RC_FILE_CONSTANT) {
+               *pindex = inv_remap_table[*pindex];
+       }
+}
+
+static void mark_used(void * userdata, struct rc_instruction * inst,
+                                               struct rc_src_register * src)
+{
+       struct mark_used_data * d = userdata;
+
+       if (src->File == RC_FILE_CONSTANT) {
+               if (src->RelAddr) {
+                       *d->has_rel_addr = 1;
+               } else {
+                       d->const_used[src->Index] = 1;
+               }
+       }
+}
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
+{
+       unsigned **out_remap_table = (unsigned**)user;
+       unsigned char *const_used;
+       unsigned *remap_table;
+       unsigned *inv_remap_table;
+       unsigned has_rel_addr = 0;
+       unsigned is_identity = 1;
+       unsigned are_externals_remapped = 0;
+       struct rc_constant *constants = c->Program.Constants.Constants;
+       struct mark_used_data d;
+       unsigned new_count;
+
+       if (!c->Program.Constants.Count) {
+               *out_remap_table = NULL;
+               return;
+       }
+
+       const_used = malloc(c->Program.Constants.Count);
+       memset(const_used, 0, c->Program.Constants.Count);
+
+       d.const_used = const_used;
+       d.has_rel_addr = &has_rel_addr;
+
+       /* Pass 1: Mark used constants. */
+       for (struct rc_instruction *inst = c->Program.Instructions.Next;
+            inst != &c->Program.Instructions; inst = inst->Next) {
+               rc_for_all_reads_src(inst, mark_used, &d);
+       }
+
+       /* Pass 2: If there is relative addressing or dead constant elimination
+        * is disabled, mark all externals as used. */
+       if (has_rel_addr || !c->remove_unused_constants) {
+               for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+                       if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+                               const_used[i] = 1;
+       }
+
+       /* Pass 3: Make the remapping table and remap constants.
+        * This pass removes unused constants simply by overwriting them by other constants. */
+       remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+       inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+       new_count = 0;
+
+       for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+               if (const_used[i]) {
+                       remap_table[new_count] = i;
+                       inv_remap_table[i] = new_count;
+
+                       if (i != new_count) {
+                               if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+                                       are_externals_remapped = 1;
+
+                               constants[new_count] = constants[i];
+                               is_identity = 0;
+                       }
+                       new_count++;
+               }
+       }
+
+       /*  is_identity ==> new_count == old_count
+        * !is_identity ==> new_count <  old_count */
+       assert( is_identity || new_count <  c->Program.Constants.Count);
+       assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
+
+       /* Pass 4: Redirect reads of all constants to their new locations. */
+       if (!is_identity) {
+               for (struct rc_instruction *inst = c->Program.Instructions.Next;
+                    inst != &c->Program.Instructions; inst = inst->Next) {
+                       rc_remap_registers(inst, remap_regs, inv_remap_table);
+               }
+       }
+
+       /* Set the new constant count. Note that new_count may be less than
+        * Count even though the remapping function is identity. In that case,
+        * the constants have been removed at the end of the array. */
+       c->Program.Constants.Count = new_count;
+
+       if (are_externals_remapped) {
+               *out_remap_table = remap_table;
+       } else {
+               *out_remap_table = NULL;
+               free(remap_table);
+       }
+
+       free(const_used);
+       free(inv_remap_table);
+
+       if (c->Debug & RC_DBG_LOG)
+               rc_constants_print(&c->Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
new file mode 100644 (file)
index 0000000..f29113b
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_REMOVE_CONSTANTS_H
+#define RADEON_REMOVE_CONSTANTS_H
+
+#include "radeon_compiler.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
new file mode 100644 (file)
index 0000000..cafa057
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_rename_regs.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+
+/**
+ * This function renames registers in an attempt to get the code close to
+ * SSA form.  After this function has completed, most of the register are only
+ * written to one time, with a few exceptions.
+ *
+ * This function assumes all the instructions are still of type
+ * RC_INSTRUCTION_NORMAL.
+ */
+void rc_rename_regs(struct radeon_compiler *c, void *user)
+{
+       unsigned int i, used_length;
+       int new_index;
+       struct rc_instruction * inst;
+       struct rc_reader_data reader_data;
+       unsigned char * used;
+
+       /* XXX Remove this once the register allocation works with flow control. */
+       for(inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
+                       return;
+       }
+
+       used_length = 2 * rc_recompute_ips(c);
+       used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+       memset(used, 0, sizeof(unsigned char) * used_length);
+
+       rc_get_used_temporaries(c, used, used_length);
+       for(inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+
+               if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+                       continue;
+
+               reader_data.ExitOnAbort = 1;
+               rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+               if (reader_data.Abort || reader_data.ReaderCount == 0)
+                       continue;
+
+               new_index = rc_find_free_temporary_list(c, used, used_length,
+                                               RC_MASK_XYZW);
+               if (new_index < 0) {
+                       rc_error(c, "Ran out of temporary registers\n");
+                       return;
+               }
+
+               reader_data.Writer->U.I.DstReg.Index = new_index;
+               for(i = 0; i < reader_data.ReaderCount; i++) {
+                       reader_data.Readers[i].U.I.Src->Index = new_index;
+               }
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
new file mode 100644 (file)
index 0000000..3baf29f
--- /dev/null
@@ -0,0 +1,9 @@
+
+#ifndef RADEON_RENAME_REGS_H
+#define RADEON_RENAME_REGS_H
+
+struct radeon_compiler;
+
+void rc_rename_regs(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_RENAME_REGS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h
new file mode 100644 (file)
index 0000000..c81d5f7
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+       unsigned char NumPhases;
+       unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+       /**
+        * Check whether the given swizzle, absolute and negate combination
+        * can be implemented natively by the hardware for this opcode.
+        *
+        * \return 1 if the swizzle is native for the given opcode
+        */
+       int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+       /**
+        * Determine how to split access to the masked channels of the
+        * given source register to obtain ALU-native swizzles.
+        */
+       void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c
new file mode 100644 (file)
index 0000000..938fb84
--- /dev/null
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask.  This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+       struct rc_variable * var,
+       unsigned int new_index,
+       unsigned int new_writemask)
+{
+       struct rc_variable * var_ptr;
+       struct rc_list * readers;
+       unsigned int old_mask = rc_variable_writemask_sum(var);
+       unsigned int conversion_swizzle =
+                       rc_make_conversion_swizzle(old_mask, new_writemask);
+
+       for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+               if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       rc_normal_rewrite_writemask(var_ptr->Inst,
+                                                       conversion_swizzle);
+                       var_ptr->Inst->U.I.DstReg.Index = new_index;
+               } else {
+                       struct rc_pair_sub_instruction * sub;
+                       if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+                               assert(new_writemask & RC_MASK_W);
+                               sub = &var_ptr->Inst->U.P.Alpha;
+                       } else {
+                               sub = &var_ptr->Inst->U.P.RGB;
+                               rc_pair_rewrite_writemask(sub,
+                                                       conversion_swizzle);
+                       }
+                       sub->DestIndex = new_index;
+               }
+       }
+
+       readers = rc_variable_readers_union(var);
+
+       for ( ; readers; readers = readers->Next) {
+               struct rc_reader * reader = readers->Item;
+               if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       reader->U.I.Src->Index = new_index;
+                       reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+                               reader->U.I.Src->Swizzle, conversion_swizzle);
+               } else {
+                       struct rc_pair_instruction * pair_inst =
+                                                       &reader->Inst->U.P;
+                       unsigned int src_type = rc_source_type_swz(
+                                                       reader->U.P.Arg->Swizzle);
+
+                       int src_index = reader->U.P.Arg->Source;
+                       if (src_index == RC_PAIR_PRESUB_SRC) {
+                               src_index = rc_pair_get_src_index(
+                                               pair_inst, reader->U.P.Src);
+                       }
+                       /* Try to delete the old src, it is OK if this fails,
+                        * because rc_pair_alloc_source might be able to
+                        * find a source the ca be reused.
+                        */
+                       if (rc_pair_remove_src(reader->Inst, src_type,
+                                                       src_index, old_mask)) {
+                               /* Reuse the source index of the source that
+                                * was just deleted and set its register
+                                * index.  We can't use rc_pair_alloc_source
+                                * for this becuase it might return a source
+                                * index that is already being used. */
+                               if (src_type & RC_SOURCE_RGB) {
+                                       pair_inst->RGB.Src[src_index]
+                                               .Used = 1;
+                                       pair_inst->RGB.Src[src_index]
+                                               .Index = new_index;
+                                       pair_inst->RGB.Src[src_index]
+                                               .File = RC_FILE_TEMPORARY;
+                               }
+                               if (src_type & RC_SOURCE_ALPHA) {
+                                       pair_inst->Alpha.Src[src_index]
+                                               .Used = 1;
+                                       pair_inst->Alpha.Src[src_index]
+                                               .Index = new_index;
+                                       pair_inst->Alpha.Src[src_index]
+                                               .File = RC_FILE_TEMPORARY;
+                               }
+                       } else {
+                               src_index = rc_pair_alloc_source(
+                                               &reader->Inst->U.P,
+                                               src_type & RC_SOURCE_RGB,
+                                               src_type & RC_SOURCE_ALPHA,
+                                               RC_FILE_TEMPORARY,
+                                               new_index);
+                               if (src_index < 0) {
+                                       rc_error(var->C, "Rewrite of inst %u failed "
+                                               "Can't allocate source for "
+                                               "Inst %u src_type=%x "
+                                               "new_index=%u new_mask=%u\n",
+                                               var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+                                               continue;
+                               }
+                       }
+                       reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+                               reader->U.P.Arg->Swizzle, conversion_swizzle);
+                       if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+                               reader->U.P.Arg->Source = src_index;
+                       }
+               }
+       }
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+       while(var) {
+               unsigned int i;
+               unsigned int start = var->Inst->IP;
+
+               for (i = 0; i < var->ReaderCount; i++) {
+                       unsigned int chan;
+                       unsigned int chan_start = start;
+                       unsigned int chan_end = var->Readers[i].Inst->IP;
+                       unsigned int mask = var->Readers[i].WriteMask;
+                       struct rc_instruction * inst;
+
+                       /* Extend the live interval of T0 to the start of the
+                        * loop for sequences like:
+                        * BGNLOOP
+                        * read T0
+                        * ...
+                        * write T0
+                        * ENDLOOP
+                        */
+                       if (var->Readers[i].Inst->IP < start) {
+                               struct rc_instruction * bgnloop =
+                                       rc_match_endloop(var->Readers[i].Inst);
+                               chan_start = bgnloop->IP;
+                       }
+
+                       /* Extend the live interval of T0 to the start of the
+                        * loop in case there is a BRK instruction in the loop
+                        * (we don't actually check for a BRK instruction we
+                        * assume there is one somewhere in the loop, which
+                        * there usually is) for sequences like:
+                        * BGNLOOP
+                        * ...
+                        * conditional BRK
+                        * ...
+                        * write T0
+                        * ENDLOOP
+                        * read T0
+                        ***************************************************
+                        * Extend the live interval of T0 to the end of the
+                        * loop for sequences like:
+                        * write T0
+                        * BGNLOOP
+                        * ...
+                        * read T0
+                        * ENDLOOP
+                        */
+                       for (inst = var->Inst; inst != var->Readers[i].Inst;
+                                                       inst = inst->Next) {
+                               rc_opcode op = rc_get_flow_control_inst(inst);
+                               if (op == RC_OPCODE_ENDLOOP) {
+                                       struct rc_instruction * bgnloop =
+                                               rc_match_endloop(inst);
+                                       if (bgnloop->IP < chan_start) {
+                                               chan_start = bgnloop->IP;
+                                       }
+                               } else if (op == RC_OPCODE_BGNLOOP) {
+                                       struct rc_instruction * endloop =
+                                               rc_match_bgnloop(inst);
+                                       if (endloop->IP > chan_end) {
+                                               chan_end = endloop->IP;
+                                       }
+                               }
+                       }
+
+                       for (chan = 0; chan < 4; chan++) {
+                               if ((mask >> chan) & 0x1) {
+                                       if (!var->Live[chan].Used
+                                       || chan_start < var->Live[chan].Start) {
+                                               var->Live[chan].Start =
+                                                               chan_start;
+                                       }
+                                       if (!var->Live[chan].Used
+                                       || chan_end > var->Live[chan].End) {
+                                               var->Live[chan].End = chan_end;
+                                       }
+                                       var->Live[chan].Used = 1;
+                               }
+                       }
+               }
+               var = var->Friend;
+       }
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+       struct rc_variable * a,
+       struct rc_variable * b)
+{
+       unsigned int a_index, b_index;
+       for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+               struct rc_reader reader_a = a->Readers[a_index];
+               for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+                       struct rc_reader reader_b = b->Readers[b_index];
+                       if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+                               && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+                               && reader_a.U.I.Src == reader_b.U.I.Src) {
+
+                               return 1;
+                       }
+                       if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+                               && reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+                               && reader_a.U.P.Src == reader_b.U.P.Src) {
+
+                               return 1;
+                       }
+               }
+       }
+       return 0;
+}
+
+void rc_variable_add_friend(
+       struct rc_variable * var,
+       struct rc_variable * friend)
+{
+       assert(var->Dst.Index == friend->Dst.Index);
+       while(var->Friend) {
+               var = var->Friend;
+       }
+       var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+       struct radeon_compiler * c,
+       unsigned int DstFile,
+       unsigned int DstIndex,
+       unsigned int DstWriteMask,
+       struct rc_reader_data * reader_data)
+{
+       struct rc_variable * new =
+                       memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+       memset(new, 0, sizeof(struct rc_variable));
+       new->C = c;
+       new->Dst.File = DstFile;
+       new->Dst.Index = DstIndex;
+       new->Dst.WriteMask = DstWriteMask;
+       if (reader_data) {
+               new->Inst = reader_data->Writer;
+               new->ReaderCount = reader_data->ReaderCount;
+               new->Readers = reader_data->Readers;
+       }
+       return new;
+}
+
+static void get_variable_helper(
+       struct rc_list ** variable_list,
+       struct rc_variable * variable)
+{
+       struct rc_list * list_ptr;
+       for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
+               if (readers_intersect(variable, list_ptr->Item)) {
+                       rc_variable_add_friend(list_ptr->Item, variable);
+                       return;
+               }
+       }
+       rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+}
+
+static void get_variable_pair_helper(
+       struct rc_list ** variable_list,
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       struct rc_pair_sub_instruction * sub_inst)
+{
+       struct rc_reader_data reader_data;
+       struct rc_variable * new_var;
+       rc_register_file file;
+       unsigned int writemask;
+
+       if (sub_inst->Opcode == RC_OPCODE_NOP) {
+               return;
+       }
+       memset(&reader_data, 0, sizeof(struct rc_reader_data));
+       rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+       if (reader_data.ReaderCount == 0) {
+               return;
+       }
+
+       if (sub_inst->WriteMask) {
+               file = RC_FILE_TEMPORARY;
+               writemask = sub_inst->WriteMask;
+       } else if (sub_inst->OutputWriteMask) {
+               file = RC_FILE_OUTPUT;
+               writemask = sub_inst->OutputWriteMask;
+       } else {
+               writemask = 0;
+               file = RC_FILE_NONE;
+       }
+       new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+                                                               &reader_data);
+       get_variable_helper(variable_list, new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program.  Each instruction
+ * that writes to a register is considered a variable.  The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain.  Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+       struct rc_instruction * inst;
+       struct rc_list * variable_list = NULL;
+
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               struct rc_reader_data reader_data;
+               struct rc_variable * new_var;
+               memset(&reader_data, 0, sizeof(reader_data));
+
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+                       if (reader_data.ReaderCount == 0) {
+                               continue;
+                       }
+                       new_var = rc_variable(c, inst->U.I.DstReg.File,
+                               inst->U.I.DstReg.Index,
+                               inst->U.I.DstReg.WriteMask, &reader_data);
+                       get_variable_helper(&variable_list, new_var);
+               } else {
+                       get_variable_pair_helper(&variable_list, c, inst,
+                                                       &inst->U.P.RGB);
+                       get_variable_pair_helper(&variable_list, c, inst,
+                                                       &inst->U.P.Alpha);
+               }
+       }
+
+       return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+       unsigned int writemask = 0;
+       while(var) {
+               writemask |= var->Dst.WriteMask;
+               var = var->Friend;
+       }
+       return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends.  Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+       struct rc_list * list = NULL;
+       while (var) {
+               unsigned int i;
+               for (i = 0; i < var->ReaderCount; i++) {
+                       struct rc_list * temp;
+                       struct rc_reader * a = &var->Readers[i];
+                       unsigned int match = 0;
+                       for (temp = list; temp; temp = temp->Next) {
+                               struct rc_reader * b = temp->Item;
+                               if (a->Inst->Type != b->Inst->Type) {
+                                       continue;
+                               }
+                               if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                                       if (a->U.I.Src == b->U.I.Src) {
+                                               match = 1;
+                                               break;
+                                       }
+                               }
+                               if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+                                       if (a->U.P.Arg == b->U.P.Arg
+                                           && a->U.P.Src == b->U.P.Src) {
+                                               match = 1;
+                                               break;
+                                       }
+                               }
+                       }
+                       if (match) {
+                               continue;
+                       }
+                       rc_list_add(&list, rc_list(&var->C->Pool, a));
+               }
+               var = var->Friend;
+       }
+       return list;
+}
+
+static unsigned int reader_equals_src(
+       struct rc_reader reader,
+       unsigned int src_type,
+       void * src)
+{
+       if (reader.Inst->Type != src_type) {
+               return 0;
+       }
+       if (src_type == RC_INSTRUCTION_NORMAL) {
+               return reader.U.I.Src == src;
+       } else {
+               return reader.U.P.Src == src;
+       }
+}
+
+static unsigned int variable_writes_src(
+       struct rc_variable * var,
+       unsigned int src_type,
+       void * src)
+{
+       unsigned int i;
+       for (i = 0; i < var->ReaderCount; i++) {
+               if (reader_equals_src(var->Readers[i], src_type, src)) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+       struct rc_list * var_list,
+       unsigned int src_type,
+       void * src)
+{
+       struct rc_list * list_ptr;
+       struct rc_list * writer_list = NULL;
+       for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+               struct rc_variable * var = list_ptr->Item;
+               if (variable_writes_src(var, src_type, src)) {
+                       struct rc_variable * friend;
+                       rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+                       for (friend = var->Friend; friend;
+                                               friend = friend->Friend) {
+                               if (variable_writes_src(friend, src_type, src)) {
+                                       rc_list_add(&writer_list,
+                                               rc_list(&var->C->Pool, friend));
+                               }
+                       }
+                       /* Once we have indentifed the variable and its
+                        * friends that write this source, we can stop
+                        * stop searching, because we know know of the
+                        * other variables in the list will write this source.
+                        * If they did they would be friends of var.
+                        */
+                       break;
+               }
+       }
+       return writer_list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+       unsigned int i;
+       while (var) {
+               fprintf(stderr, "%u: TEMP[%u].%u: ",
+                       var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+               for (i = 0; i < 4; i++) {
+                       fprintf(stderr, "chan %u: start=%u end=%u ", i,
+                                       var->Live[i].Start, var->Live[i].End);
+               }
+               fprintf(stderr, "%u readers\n", var->ReaderCount);
+               if (var->Friend) {
+                       fprintf(stderr, "Friend: \n\t");
+               }
+               var = var->Friend;
+       }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h
new file mode 100644 (file)
index 0000000..9427bee
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+       int Start;
+       int End;
+       int Used;
+};
+
+struct rc_variable {
+       struct radeon_compiler * C;
+       struct rc_dst_register Dst;
+
+       struct rc_instruction * Inst;
+       unsigned int ReaderCount;
+       struct rc_reader * Readers;
+       struct live_intervals Live[4];
+
+       /* A friend is a variable that shares a reader with another variable.
+        */
+       struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+       struct rc_variable * var,
+       unsigned int new_index,
+       unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+       struct rc_variable * var,
+       struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+       struct radeon_compiler * c,
+       unsigned int DstFile,
+       unsigned int DstIndex,
+       unsigned int DstWriteMask,
+       struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+struct rc_list * rc_variable_list_get_writers(
+       struct rc_list * var_list,
+       unsigned int src_type,
+       void * src);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */
diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore
new file mode 100644 (file)
index 0000000..85672fe
--- /dev/null
@@ -0,0 +1 @@
+radeon_compiler_util_tests
diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile
new file mode 100644 (file)
index 0000000..6eda34a
--- /dev/null
@@ -0,0 +1,53 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += -Wall -Werror
+
+### Basic defines ###
+TESTS =        radeon_compiler_util_tests
+
+TEST_SOURCES := $(TESTS:=.c)
+
+SHARED_SOURCES =               \
+       rc_test_helpers.c       \
+       unit_test.c
+
+C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES)
+
+INCLUDES = \
+       -I. \
+       -I..
+
+COMPILER_LIB = ../../libr300.a
+
+##### TARGETS #####
+
+default: depend run_tests
+
+depend: $(C_SOURCES)
+       rm -f depend
+       touch depend
+       $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null
+
+# Remove .o and backup files
+clean:
+       rm -f $(TESTS) depend depend.bak
+
+$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB)
+       $(APP_CC) -o $@ $^
+
+run_tests: $(TESTS)
+       @echo "RUNNING TESTS:"
+       @echo ""
+       $(foreach test, $^, @./$(test))
+
+.PHONY: $(COMPILER_LIB)
+$(COMPILER_LIB):
+       $(MAKE) -C ../..
+
+##### RULES #####
+.c.o:
+       $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
new file mode 100644 (file)
index 0000000..a2e3f2a
--- /dev/null
@@ -0,0 +1,76 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+#include "rc_test_helpers.h"
+#include "unit_test.h"
+
+static void test_rc_inst_can_use_presub(
+       struct test_result * result,
+       int expected,
+       const char * add_str,
+       const char * replace_str)
+{
+       struct rc_instruction add_inst, replace_inst;
+       int ret;
+
+       test_begin(result);
+       init_rc_normal_instruction(&add_inst, add_str);
+       init_rc_normal_instruction(&replace_inst, replace_str);
+
+       ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
+                       &replace_inst.U.I.SrcReg[0],
+                       &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
+
+       test_check(result, ret == expected);
+}
+
+static void test_runner_rc_inst_can_use_presub(struct test_result * result)
+{
+
+       /* This tests the case where the source being replace has the same
+        * register file and register index as another source register in the
+        * CMP instruction.  A previous version of this function was ignoring
+        * all registers that shared the same file and index as the replacement
+        * register when counting the number of source selects.
+        *
+        * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+        */
+       test_rc_inst_can_use_presub(result, 0,
+               "ADD temp[0].z, temp[6].__x_, const[1].__x_;",
+               "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;");
+
+
+       /* Testing a random case that should fail
+        *
+        * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+        */
+       test_rc_inst_can_use_presub(result, 0,
+               "ADD temp[3], temp[1], temp[2];",
+               "MAD temp[1], temp[0], const[0].xxxx, -temp[3];");
+
+       /* This tests the case where the arguments of the ADD
+        * instruction share the same register file and index.  Normally, we
+        * would need only one source select for these two arguments, but since
+        * they will be part of a presubtract operation we need to use the two
+        * source selects that the presubtract instruction expects
+        * (src0 and src1).
+        *
+        * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+        */
+       test_rc_inst_can_use_presub(result, 0,
+               "ADD temp[3].x, temp[0].x___, temp[0].x___;",
+               "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;");
+}
+
+int main(int argc, char ** argv)
+{
+       struct test tests[] = {
+               {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub},
+               {NULL, NULL}
+       };
+       run_tests(tests);
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
new file mode 100644 (file)
index 0000000..ca4738a
--- /dev/null
@@ -0,0 +1,380 @@
+#include <errno.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "../radeon_compiler_util.h"
+#include "../radeon_opcodes.h"
+#include "../radeon_program.h"
+
+#include "rc_test_helpers.h"
+
+/* This file contains some helper functions for filling out the rc_instruction
+ * data structures.  These functions take a string as input based on the format
+ * output by rc_program_print().
+ */
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+#define REGEX_ERR_BUF_SIZE 50
+
+struct match_info {
+       const char * String;
+       int Length;
+};
+
+static int match_length(regmatch_t * matches, int index)
+{
+       return matches[index].rm_eo - matches[index].rm_so;
+}
+
+static int regex_helper(
+       const char * regex_str,
+       const char * search_str,
+       regmatch_t * matches,
+       int num_matches)
+{
+       char err_buf[REGEX_ERR_BUF_SIZE];
+       regex_t regex;
+       int err_code;
+       unsigned int i;
+
+       err_code = regcomp(&regex, regex_str, REG_EXTENDED);
+       if (err_code) {
+               regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+               fprintf(stderr, "Failed to compile regex: %s\n", err_buf);
+               return 0;
+       }
+
+       err_code = regexec(&regex, search_str, num_matches, matches, 0);
+       DBG("Search string: '%s'\n", search_str);
+       for (i = 0; i < num_matches; i++) {
+               DBG("Match %u start = %d end = %d\n", i,
+                                       matches[i].rm_so, matches[i].rm_eo);
+       }
+       if (err_code) {
+               regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+               fprintf(stderr, "Failed to match regex: %s\n", err_buf);
+               return 0;
+       }
+       return 1;
+}
+
+#define REGEX_SRC_MATCHES 6
+
+struct src_tokens {
+       struct match_info Negate;
+       struct match_info Abs;
+       struct match_info File;
+       struct match_info Index;
+       struct match_info Swizzle;
+};
+
+/**
+ * Initialize the source register at index src_index for the instruction based
+ * on src_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param src_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_src(
+       struct rc_instruction * inst,
+       unsigned int src_index,
+       const char * src_str)
+{
+       const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)";
+       regmatch_t matches[REGEX_SRC_MATCHES];
+       struct src_tokens tokens;
+       struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index];
+       unsigned int i;
+
+       /* Execute the regex */
+       if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) {
+               fprintf(stderr, "Failed to execute regex for src register.\n");
+               return 0;
+       }
+
+       /* Create Tokens */
+       tokens.Negate.String = src_str + matches[1].rm_so;
+       tokens.Negate.Length = match_length(matches, 1);
+       tokens.Abs.String = src_str + matches[2].rm_so;
+       tokens.Abs.Length = match_length(matches, 2);
+       tokens.File.String = src_str + matches[3].rm_so;
+       tokens.File.Length = match_length(matches, 3);
+       tokens.Index.String = src_str + matches[4].rm_so;
+       tokens.Index.Length = match_length(matches, 4);
+       tokens.Swizzle.String = src_str + matches[5].rm_so;
+       tokens.Swizzle.Length = match_length(matches, 5);
+
+       /* Negate */
+       if (tokens.Negate.Length  > 0) {
+               src_reg->Negate = RC_MASK_XYZW;
+       }
+
+       /* Abs */
+       if (tokens.Abs.Length > 0) {
+               src_reg->Abs = 1;
+       }
+
+       /* File */
+       if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+               src_reg->File = RC_FILE_TEMPORARY;
+       } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) {
+               src_reg->File = RC_FILE_INPUT;
+       } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) {
+               src_reg->File = RC_FILE_CONSTANT;
+       } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
+               src_reg->File = RC_FILE_NONE;
+       }
+
+       /* Index */
+       errno = 0;
+       src_reg->Index = strtol(tokens.Index.String, NULL, 10);
+       if (errno > 0) {
+               fprintf(stderr, "Could not convert src register index.\n");
+               return 0;
+       }
+
+       /* Swizzle */
+       if (tokens.Swizzle.Length == 0) {
+               src_reg->Swizzle = RC_SWIZZLE_XYZW;
+       } else {
+               int str_index = 1;
+               src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED);
+               if (tokens.Swizzle.String[0] != '.') {
+                       fprintf(stderr, "First char of swizzle is not valid.\n");
+                       return 0;
+               }
+               for (i = 0; i < 4; i++, str_index++) {
+                       if (tokens.Swizzle.String[str_index] == '-') {
+                               src_reg->Negate |= (1 << i);
+                               str_index++;
+                       }
+                       switch(tokens.Swizzle.String[str_index]) {
+                       case 'x':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X);
+                               break;
+                       case 'y':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y);
+                               break;
+                       case 'z':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z);
+                               break;
+                       case 'w':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W);
+                               break;
+                       case '1':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE);
+                               break;
+                       case '0':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO);
+                               break;
+                       case 'H':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF);
+                               break;
+                       case '_':
+                               SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED);
+                               break;
+                       default:
+                               fprintf(stderr, "Unknown src register swizzle.\n");
+                               return 0;
+                       }
+               }
+       }
+       DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n",
+                       src_reg->File, src_reg->Index, src_reg->Swizzle,
+                       src_reg->Negate, src_reg->Abs);
+       return 1;
+}
+
+#define REGEX_DST_MATCHES 4
+
+struct dst_tokens {
+       struct match_info File;
+       struct match_info Index;
+       struct match_info WriteMask;
+};
+
+/**
+ * Initialize the destination for the instruction based on dst_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param dst_str A string that represents the destination register.  The format
+ * for this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_dst(
+       struct rc_instruction * inst,
+       const char * dst_str)
+{
+       const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)";
+       regmatch_t matches[REGEX_DST_MATCHES];
+       struct dst_tokens tokens;
+       unsigned int i;
+
+       /* Execute the regex */
+       if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) {
+               fprintf(stderr, "Failed to execute regex for dst register.\n");
+               return 0;
+       }
+
+       /* Create Tokens */
+       tokens.File.String = dst_str + matches[1].rm_so;
+       tokens.File.Length = match_length(matches, 1);
+       tokens.Index.String = dst_str + matches[2].rm_so;
+       tokens.Index.Length = match_length(matches, 2);
+       tokens.WriteMask.String = dst_str + matches[3].rm_so;
+       tokens.WriteMask.Length = match_length(matches, 3);
+
+       /* File Type */
+       if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+       } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) {
+               inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+       } else {
+               fprintf(stderr, "Unknown dst register file type.\n");
+               return 0;
+       }
+
+       /* File Index */
+       errno = 0;
+       inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10);
+
+       if (errno > 0) {
+               fprintf(stderr, "Could not convert dst register index\n");
+               return 0;
+       }
+
+       /* WriteMask */
+       if (tokens.WriteMask.Length == 0) {
+               inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+       } else {
+               /* The first character should be '.' */
+               if (tokens.WriteMask.String[0] != '.') {
+                       fprintf(stderr, "1st char of writemask is not valid.\n");
+                       return 0;
+               }
+               for (i = 1; i < tokens.WriteMask.Length; i++) {
+                       switch(tokens.WriteMask.String[i]) {
+                       case 'x':
+                               inst->U.I.DstReg.WriteMask |= RC_MASK_X;
+                               break;
+                       case 'y':
+                               inst->U.I.DstReg.WriteMask |= RC_MASK_Y;
+                               break;
+                       case 'z':
+                               inst->U.I.DstReg.WriteMask |= RC_MASK_Z;
+                               break;
+                       case 'w':
+                               inst->U.I.DstReg.WriteMask |= RC_MASK_W;
+                               break;
+                       default:
+                               fprintf(stderr, "Unknown swizzle in writemask.\n");
+                               return 0;
+                       }
+               }
+       }
+       DBG("Dst Reg File=%u Index=%d Writemask=%d\n",
+                       inst->U.I.DstReg.File,
+                       inst->U.I.DstReg.Index,
+                       inst->U.I.DstReg.WriteMask);
+       return 1;
+}
+
+#define REGEX_INST_MATCHES 7
+
+struct inst_tokens {
+       struct match_info Opcode;
+       struct match_info Sat;
+       struct match_info Dst;
+       struct match_info Srcs[3];
+};
+
+/**
+ * Initialize a normal instruction based on inst_str.
+ *
+ * WARNING: This function might not be able to handle every kind of format that
+ * rc_program_print() can output.  If you are having problems with a
+ * particular string, you may need to add support for it to this functions.
+ *
+ * @param inst_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_instruction(
+       struct rc_instruction * inst,
+       const char * inst_str)
+{
+       const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)";
+       int i;
+       regmatch_t matches[REGEX_INST_MATCHES];
+       struct inst_tokens tokens;
+
+       /* Initialize inst */
+       memset(inst, 0, sizeof(struct rc_instruction));
+       inst->Type = RC_INSTRUCTION_NORMAL;
+
+       /* Execute the regex */
+       if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) {
+               return 0;
+       }
+       memset(&tokens, 0, sizeof(tokens));
+
+       /* Create Tokens */
+       tokens.Opcode.String = inst_str + matches[1].rm_so;
+       tokens.Opcode.Length = match_length(matches, 1);
+       if (matches[2].rm_so > -1) {
+               tokens.Sat.String = inst_str + matches[2].rm_so;
+               tokens.Sat.Length = match_length(matches, 2);
+       }
+
+
+       /* Fill out the rest of the instruction. */
+       for (i = 0; i < MAX_RC_OPCODE; i++) {
+               const struct rc_opcode_info * info = rc_get_opcode_info(i);
+               unsigned int first_src = 3;
+               unsigned int j;
+               if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) {
+                       continue;
+               }
+               inst->U.I.Opcode = info->Opcode;
+               if (info->HasDstReg) {
+                       char * dst_str;
+                       tokens.Dst.String = inst_str + matches[3].rm_so;
+                       tokens.Dst.Length = match_length(matches, 3);
+                       first_src++;
+
+                       dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1));
+                       strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length);
+                       dst_str[tokens.Dst.Length] = '\0';
+                       init_rc_normal_dst(inst, dst_str);
+                       free(dst_str);
+               }
+               for (j = 0; j < info->NumSrcRegs; j++) {
+                       char * src_str;
+                       tokens.Srcs[j].String =
+                               inst_str + matches[first_src + j].rm_so;
+                       tokens.Srcs[j].Length =
+                               match_length(matches, first_src + j);
+
+                       src_str = malloc(sizeof(char) *
+                                               (tokens.Srcs[j].Length + 1));
+                       strncpy(src_str, tokens.Srcs[j].String,
+                                               tokens.Srcs[j].Length);
+                       src_str[tokens.Srcs[j].Length] = '\0';
+                       init_rc_normal_src(inst, j, src_str);
+               }
+               break;
+       }
+       return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
new file mode 100644 (file)
index 0000000..1a6bf96
--- /dev/null
@@ -0,0 +1,13 @@
+
+int init_rc_normal_src(
+       struct rc_instruction * inst,
+       unsigned int src_index,
+       const char * src_str);
+
+int init_rc_normal_dst(
+       struct rc_instruction * inst,
+       const char * dst_str);
+
+int init_rc_normal_instruction(
+       struct rc_instruction * inst,
+       const char * inst_str);
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c
new file mode 100644 (file)
index 0000000..266f336
--- /dev/null
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "unit_test.h"
+
+void run_tests(struct test tests[])
+{
+       int i;
+       for (i = 0; tests[i].name; i++) {
+               printf("Test %s\n", tests[i].name);
+               memset(&tests[i].result, 0, sizeof(tests[i].result));
+               tests[i].test_func(&tests[i].result);
+               printf("Test %s (%d/%d) pass\n", tests[i].name,
+                       tests[i].result.pass, tests[i].result.test_count);
+       }
+}
+
+void test_begin(struct test_result * result)
+{
+       result->test_count++;
+}
+
+void test_check(struct test_result * result, int cond)
+{
+       printf("Subtest %u -> ", result->test_count);
+       if (cond) {
+               result->pass++;
+               printf("Pass");
+       } else {
+               result->fail++;
+               printf("Fail");
+       }
+       printf("\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h
new file mode 100644 (file)
index 0000000..441e8b6
--- /dev/null
@@ -0,0 +1,17 @@
+
+struct test_result {
+       unsigned int test_count;
+       unsigned int pass;
+       unsigned int fail;
+};
+
+struct test {
+       const char * name;
+       void (*test_func)(struct test_result * result);
+       struct test_result result;
+};
+
+void run_tests(struct test tests[]);
+
+void test_begin(struct test_result * result);
+void test_check(struct test_result * result, int cond);
index 6c1c9d2fb13b791a95c116ea00a79875c1ea1c8e..234e043b0712fad3be43ca5717afdb7f1048feab 100644 (file)
@@ -24,7 +24,6 @@
 #define R300_EMIT_H
 
 #include "r300_context.h"
-#include "radeon_code.h"
 
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
index e3a1bc4a0f4e7b8894d7a7e4ecce276ccc7cd918..a9fd3ad40dddfc715d89a383b62a3ce46ae1182c 100644 (file)
@@ -38,8 +38,7 @@
 #include "r300_texture.h"
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_code.h"
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about FS input semantics to r300_shader_semantics. */
 void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
index c86a90b85ae40059ff5fec747d066d3673bebc3c..45c9e8801c3d1cbbe965061a241ab691ae3fa725 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 #include "r300_shader_semantics.h"
 
 struct r300_fragment_shader_code {
index bb30b1ab0beb8733e404c42e44c3f421170f7708..5edbb22a7437476df82d55bb0f140b7071bf778c 100644 (file)
@@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_OUTC_D2A                (3 << 23)
 #       define R300_ALU_OUTC_MIN                (4 << 23)
 #       define R300_ALU_OUTC_MAX                (5 << 23)
-#       define R300_ALU_OUTC_CMPH               (7 << 23)
+#       define R300_ALU_OUTC_CND                (7 << 23)
 #       define R300_ALU_OUTC_CMP                (8 << 23)
 #       define R300_ALU_OUTC_FRC                (9 << 23)
 #       define R300_ALU_OUTC_REPL_ALPHA         (10 << 23)
@@ -2944,6 +2944,23 @@ enum {
 
 /*\}*/
 
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+        (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT)      \
+        | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT)    \
+        | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+        | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT)  \
+        | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT)        /* X Y Z W */   \
+        | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate)       \
+       (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT)                         \
+        | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT)                       \
+        | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT)                       \
+        | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT)                       \
+        | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT)                       \
+        | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */                           \
+        | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
 /* BEGIN: Packet 3 commands */
 
 /* A primitive emission dword. */
@@ -3249,6 +3266,8 @@ enum {
 #   define R500_INST_RGB_CLAMP                         (1 << 19)
 #   define R500_INST_ALPHA_CLAMP                       (1 << 20)
 #   define R500_INST_ALU_RESULT_SEL                    (1 << 21)
+#   define R500_INST_ALU_RESULT_SEL_RED                        (0 << 21)
+#   define R500_INST_ALU_RESULT_SEL_ALPHA              (1 << 21)
 #   define R500_INST_ALPHA_PRED_INV                    (1 << 22)
 #   define R500_INST_ALU_RESULT_OP_EQ                  (0 << 23)
 #   define R500_INST_ALU_RESULT_OP_LT                  (1 << 23)
index 0561ab9bfa4526e5c25efb0e54867738eb40500c..07a3f3caee72e0f1449cc45a7752bb9f09f47922 100644 (file)
@@ -22,8 +22,7 @@
 
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_compiler.h"
-#include "radeon_program.h"
+#include "compiler/radeon_compiler.h"
 
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
index b319890157fe970ea4a134612bf722c23922f635..a5e8fd680ff1a771b8b665c44bd0e94870cfc40a 100644 (file)
@@ -32,7 +32,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about VS output semantics into r300_shader_semantics. */
 static void r300_shader_read_vs_outputs(
index 170de6c79dbbbc0ddcbef18b92d3a3764746c9fd..a482ddce9c9c7209a75094afd9da19fa9e0adadb 100644 (file)
@@ -26,7 +26,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 
 #include "r300_context.h"
 #include "r300_shader_semantics.h"