r300_fragprog: Refactor TEX transformation
authorNicolai Haehnle <nhaehnle@gmail.com>
Fri, 13 Jun 2008 23:46:19 +0000 (01:46 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sat, 14 Jun 2008 02:14:56 +0000 (04:14 +0200)
Streamlining source and destination registers, as well as texcoord scaling for
RECT textures is now done in a radeon_program based transformation.

The idea is that this will allow us to optimize away unnecessary indirections
more easily.

src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r300_fragprog.h
src/mesa/drivers/dri/r300/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_program.c
src/mesa/drivers/dri/r300/radeon_program.h

index 94cb11afecb3aef2cb1a7bc298f740461de818b4..4c6289298e75149b403b3560187a4ce93d90e765 100644 (file)
 #include "r300_state.h"
 
 
+static void reset_srcreg(struct prog_src_register* reg)
+{
+       _mesa_bzero(reg, sizeof(*reg));
+       reg->Swizzle = SWIZZLE_NOOP;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ *  - premultiply texture coordinates for RECT
+ *  - extract operand swizzles
+ *  - introduce a temporary register when write masks are needed
+ *
+ * \todo If/when r5xx uses the radeon_program architecture, this can probably
+ * be reused.
+ */
+static GLboolean transform_TEX(
+       struct radeon_program_transform_context* context,
+       struct prog_instruction* orig_inst, void* data)
+{
+       struct r300_fragment_program_compiler *compiler =
+               (struct r300_fragment_program_compiler*)data;
+       struct prog_instruction inst = *orig_inst;
+       struct prog_instruction* tgt;
+       GLboolean destredirect = GL_FALSE;
+
+       if (inst.Opcode != OPCODE_TEX &&
+           inst.Opcode != OPCODE_TXB &&
+           inst.Opcode != OPCODE_TXP &&
+           inst.Opcode != OPCODE_KIL)
+               return GL_FALSE;
+
+       /* Hardware uses [0..1]x[0..1] range for rectangle textures
+        * instead of [0..Width]x[0..Height].
+        * Add a scaling instruction.
+        */
+       if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
+               gl_state_index tokens[STATE_LENGTH] = {
+                       STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+                       0
+               };
+
+               int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+               int factor_index;
+
+               tokens[2] = inst.TexSrcUnit;
+               factor_index =
+                       _mesa_add_state_reference(
+                               compiler->fp->mesa_program.Base.Parameters, tokens);
+
+               tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+                       context->dest->NumInstructions, 1);
+
+               tgt->Opcode = OPCODE_MAD;
+               tgt->DstReg.File = PROGRAM_TEMPORARY;
+               tgt->DstReg.Index = tempreg;
+               tgt->SrcReg[0] = inst.SrcReg[0];
+               tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
+               tgt->SrcReg[1].Index = factor_index;
+               tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+               tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+
+               reset_srcreg(&inst.SrcReg[0]);
+               inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+               inst.SrcReg[0].Index = tempreg;
+       }
+
+       /* Texture operations do not support swizzles etc. in hardware,
+        * so emit an additional arithmetic operation if necessary.
+        */
+       if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
+           inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
+               int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+
+               tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+                       context->dest->NumInstructions, 1);
+
+               tgt->Opcode = OPCODE_MAD;
+               tgt->DstReg.File = PROGRAM_TEMPORARY;
+               tgt->DstReg.Index = tempreg;
+               tgt->SrcReg[0] = inst.SrcReg[0];
+               tgt->SrcReg[1].File = PROGRAM_BUILTIN;
+               tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
+               tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+               tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+
+               reset_srcreg(&inst.SrcReg[0]);
+               inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+               inst.SrcReg[0].Index = tempreg;
+       }
+
+       if (inst.Opcode != OPCODE_KIL) {
+               if (inst.DstReg.File != PROGRAM_TEMPORARY ||
+                   inst.DstReg.WriteMask != WRITEMASK_XYZW) {
+                       int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+
+                       inst.DstReg.File = PROGRAM_TEMPORARY;
+                       inst.DstReg.Index = tempreg;
+                       inst.DstReg.WriteMask = WRITEMASK_XYZW;
+                       destredirect = GL_TRUE;
+               }
+       }
+
+       tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+               context->dest->NumInstructions, 1);
+       _mesa_copy_instructions(tgt, &inst, 1);
+
+       if (destredirect) {
+               tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+                       context->dest->NumInstructions, 1);
+
+               tgt->Opcode = OPCODE_MAD;
+               tgt->DstReg = orig_inst->DstReg;
+               tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+               tgt->SrcReg[0].Index = inst.DstReg.Index;
+               tgt->SrcReg[1].File = PROGRAM_BUILTIN;
+               tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
+               tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+               tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+       }
+
+       return GL_TRUE;
+}
+
+
 static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
 {
        struct gl_fragment_program *mp = &fp->mesa_program;
@@ -170,6 +294,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
 
                insert_WPOS_trailer(&compiler);
 
+               struct radeon_program_transformation transformations[1] = {
+                       { &transform_TEX, &compiler }
+               };
+               radeonClauseLocalTransform(&compiler.compiler,
+                       &compiler.compiler.Clauses[0],
+                       1, transformations);
+
                if (!r300FragmentProgramEmit(&compiler))
                        fp->error = GL_TRUE;
 
index 8c836c4bda67eb4de8f18fb884fda872f10e48d9..7c1e210b04422a67c6939b43cb92d07142a8a039 100644 (file)
@@ -149,6 +149,7 @@ struct r300_fragment_program_compiler {
        struct radeon_compiler compiler;
 };
 
+extern void r300FPTransformTextures(struct r300_fragment_program_compiler *compiler);
 extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
 
 
index fe8a347a625fcda2730ef6a3248fb6f70c3c762f..aec202a129b1b71982e4082c8b0db9fdae6f6fff 100644 (file)
@@ -527,32 +527,6 @@ static GLuint get_temp_reg(struct r300_pfs_compile_state *cs)
        return r;
 }
 
-/**
- * Create a new Mesa temporary register that will act as the destination
- * register for a texture read.
- */
-static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs)
-{
-       COMPILE_STATE;
-       GLuint r = undef;
-       GLuint index;
-
-       index = ffs(~cs->temp_in_use);
-       if (!index) {
-               ERROR("Out of program temps\n");
-               return r;
-       }
-
-       cs->temp_in_use |= (1 << --index);
-       cs->temps[index].refcount = 0xFFFFFFFF;
-       cs->temps[index].reg = get_hw_temp_tex(cs);
-
-       REG_SET_TYPE(r, REG_TYPE_TEMP);
-       REG_SET_INDEX(r, index);
-       REG_SET_VALID(r, GL_TRUE);
-       return r;
-}
-
 /**
  * Free a Mesa temporary and the associated R300 temporary.
  */
@@ -847,6 +821,15 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,
                                  fp->mesa_program.Base.Parameters->
                                  ParameterValues[fpsrc.Index]);
                break;
+       case PROGRAM_BUILTIN:
+               switch(fpsrc.Swizzle) {
+               case SWIZZLE_1111: r = pfs_one; break;
+               case SWIZZLE_0000: r = pfs_zero; break;
+               default:
+                       ERROR("bad PROGRAM_BUILTIN swizzle %u\n", fpsrc.Swizzle);
+                       break;
+               }
+               break;
        default:
                ERROR("unknown SrcReg->File %x\n", fpsrc.File);
                return r;
@@ -1003,56 +986,10 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
 {
        COMPILE_STATE;
        GLuint coord = t_src(cs, fpi->SrcReg[0]);
-       GLuint dest = undef, rdest = undef;
+       GLuint dest = undef;
        GLuint din, uin;
        int unit = fpi->TexSrcUnit;
        int hwsrc, hwdest;
-       GLuint tempreg = 0;
-
-       /**
-        * Hardware uses [0..1]x[0..1] range for rectangle textures
-        * instead of [0..Width]x[0..Height].
-        * Add a scaling instruction.
-        *
-        * \todo Refactor this once we have proper rewriting/optimization
-        * support for programs.
-        */
-       if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
-               gl_state_index tokens[STATE_LENGTH] = {
-                       STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
-                       0
-               };
-               int factor_index;
-               GLuint factorreg;
-
-               tokens[2] = unit;
-               factor_index =
-                       _mesa_add_state_reference(fp->mesa_program.Base.
-                                               Parameters, tokens);
-               factorreg =
-                       emit_const4fv(cs,
-                               fp->mesa_program.Base.Parameters->
-                               ParameterValues[factor_index]);
-               tempreg = keep(get_temp_reg(cs));
-
-               emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
-                       coord, factorreg, pfs_zero, 0);
-
-               coord = tempreg;
-       }
-
-       /* Texture operations do not support swizzles etc. in hardware,
-        * so emit an additional arithmetic operation if necessary.
-        */
-       if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ ||
-           REG_GET_SSWZ(coord) != SWIZZLE_W ||
-           coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) {
-               assert(tempreg == 0);
-               tempreg = keep(get_temp_reg(cs));
-               emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
-                       coord, pfs_one, pfs_zero, 0);
-               coord = tempreg;
-       }
 
        /* Ensure correct node indirection */
        uin = cs->used_in_node;
@@ -1064,15 +1001,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
        if (opcode != R300_TEX_OP_KIL) {
                dest = t_dst(cs, fpi->DstReg);
 
-               /* r300 doesn't seem to be able to do TEX->output reg */
-               if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
-                       rdest = dest;
-                       dest = get_temp_reg_tex(cs);
-               } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) {
-                       /* in case write mask isn't XYZW */
-                       rdest = dest;
-                       dest = get_temp_reg_tex(cs);
-               }
                hwdest =
                    t_hw_dst(cs, dest, GL_TRUE,
                             code->node[code->cur_node].alu_offset);
@@ -1132,17 +1060,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
                cs->used_in_node |= (1 << hwsrc);
 
        code->node[code->cur_node].tex_end++;
-
-       /* Copy from temp to output if needed */
-       if (REG_GET_VALID(rdest)) {
-               emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest,
-                          pfs_one, pfs_zero, 0);
-               free_temp(cs, dest);
-       }
-
-       /* Free temp register */
-       if (tempreg != 0)
-               free_temp(cs, tempreg);
 }
 
 /**
index 7b03fa65236fb33af15d97907ec75f1441c8c591..41cedbe61dc5cfe563b0e94c04122cd20ce44caf 100644 (file)
@@ -149,3 +149,101 @@ void radeonCompilerEraseClauses(
 
        _mesa_free(oldClauses);
 }
+
+
+/**
+ * Insert new instructions at the given position, initialize them as NOPs
+ * and return a pointer to the first new instruction.
+ */
+struct prog_instruction* radeonClauseInsertInstructions(
+       struct radeon_compiler *compiler,
+       struct radeon_clause *clause,
+       int position, int count)
+{
+       int newNumInstructions = clause->NumInstructions + count;
+
+       assert(position >= 0 && position <= clause->NumInstructions);
+
+       if (newNumInstructions <= clause->ReservedInstructions) {
+               memmove(clause->Instructions + position + count, clause->Instructions + position,
+                       (clause->NumInstructions - position) * sizeof(struct prog_instruction));
+       } else {
+               struct prog_instruction *oldInstructions = clause->Instructions;
+
+               clause->ReservedInstructions *= 2;
+               if (newNumInstructions > clause->ReservedInstructions)
+                       clause->ReservedInstructions = newNumInstructions;
+
+               clause->Instructions = (struct prog_instruction*)
+                       _mesa_malloc(clause->ReservedInstructions * sizeof(struct prog_instruction));
+
+               if (oldInstructions) {
+                       _mesa_memcpy(clause->Instructions, oldInstructions,
+                               position * sizeof(struct prog_instruction));
+                       _mesa_memcpy(clause->Instructions + position + count, oldInstructions + position,
+                               (clause->NumInstructions - position) * sizeof(struct prog_instruction));
+
+                       _mesa_free(oldInstructions);
+               }
+       }
+
+       clause->NumInstructions = newNumInstructions;
+       _mesa_init_instructions(clause->Instructions + position, count);
+       return clause->Instructions + position;
+}
+
+
+/**
+ * Transform the given clause in the following way:
+ *  1. Replace it with an empty clause
+ *  2. For every instruction in the original clause, try the given
+ *     transformations in order.
+ *  3. If one of the transformations returns GL_TRUE, assume that it
+ *     has emitted the appropriate instruction(s) into the new clause;
+ *     otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonClauseLocalTransform(
+       struct radeon_compiler *compiler,
+       struct radeon_clause *clause,
+       int num_transformations,
+       struct radeon_program_transformation* transformations)
+{
+       struct radeon_program_transform_context context;
+       struct radeon_clause source;
+       int ip;
+
+       source = *clause;
+       clause->Instructions = 0;
+       clause->NumInstructions = 0;
+       clause->ReservedInstructions = 0;
+
+       context.compiler = compiler;
+       context.dest = clause;
+       context.src = &source;
+
+       for(ip = 0; ip < source.NumInstructions; ++ip) {
+               struct prog_instruction *instr = source.Instructions + ip;
+               int i;
+
+               for(i = 0; i < num_transformations; ++i) {
+                       struct radeon_program_transformation* t = transformations + i;
+
+                       if (t->function(&context, instr, t->userData))
+                               break;
+               }
+
+               if (i >= num_transformations) {
+                       struct prog_instruction *tgt =
+                               radeonClauseInsertInstructions(compiler, clause, clause->NumInstructions, 1);
+                       _mesa_copy_instructions(tgt, instr, 1);
+               }
+       }
+
+       _mesa_free_instructions(source.Instructions, source.NumInstructions);
+}
index 18091ac02ad49863ee5b9a086ebc5cdfa4c33262..3cde4d4f6fc7f9b21f37844802dfd3d029242b59 100644 (file)
@@ -41,6 +41,13 @@ enum {
        CLAUSE_TEX
 };
 
+enum {
+       PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+};
+
+#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
+#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+
 /**
  * A clause is simply a sequence of instructions that are executed
  * in order.
@@ -107,4 +114,50 @@ void radeonCompilerEraseClauses(
        int start,
        int end);
 
+struct prog_instruction* radeonClauseInsertInstructions(
+       struct radeon_compiler *compiler,
+       struct radeon_clause *clause,
+       int position, int count);
+
+/**
+ *
+ */
+struct radeon_program_transform_context {
+       struct radeon_compiler *compiler;
+
+       /**
+        * Destination clause where new instructions must be written.
+        */
+       struct radeon_clause *dest;
+
+       /**
+        * Original clause that is currently being transformed.
+        */
+       struct radeon_clause *src;
+};
+
+/**
+ * A transformation that can be passed to \ref radeonClauseLinearTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+       GLboolean (*function)(
+               struct radeon_program_transform_context*,
+               struct prog_instruction*,
+               void*);
+       void *userData;
+};
+
+void radeonClauseLocalTransform(
+       struct radeon_compiler *compiler,
+       struct radeon_clause *clause,
+       int num_transformations,
+       struct radeon_program_transformation* transformations);
+
 #endif