r300: add hw accelerated support for different vertex data formats
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
index fd9769d653db0d7e65c5ddf82c8996a7ef747c38..8f0b70ad3a3eeab52f547b4657d60b85884d9c14 100644 (file)
 
 #include "r500_fragprog.h"
 
+static void reset_srcreg(struct prog_src_register* reg)
+{
+       _mesa_bzero(reg, sizeof(*reg));
+       reg->Swizzle = SWIZZLE_NOOP;
+}
+
+static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+{
+       gl_state_index fail_value_tokens[STATE_LENGTH] = {
+               STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
+       };
+       struct prog_src_register reg = { 0, };
+
+       fail_value_tokens[2] = tmu;
+       reg.File = PROGRAM_STATE_VAR;
+       reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+       reg.Swizzle = SWIZZLE_WWWW;
+       return reg;
+}
 
 /**
  * Transform TEX, TXP, TXB, and KIL instructions in the following way:
  *  - introduce a temporary register when write masks are needed
  *
  */
-static GLboolean transform_TEX(
-       struct radeon_program_transform_context* context,
+GLboolean r500_transform_TEX(
+       struct radeon_transform_context *t,
        struct prog_instruction* orig_inst, void* data)
 {
-       struct r500_fragment_program_compiler *compiler =
-               (struct r500_fragment_program_compiler*)data;
+       struct r300_fragment_program_compiler *compiler =
+               (struct r300_fragment_program_compiler*)data;
        struct prog_instruction inst = *orig_inst;
        struct prog_instruction* tgt;
        GLboolean destredirect = GL_FALSE;
@@ -53,39 +72,59 @@ static GLboolean transform_TEX(
 
        /* ARB_shadow & EXT_shadow_funcs */
        if (inst.Opcode != OPCODE_KIL &&
-           compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
 
                if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
-                       tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
-                               context->dest->NumInstructions, 1);
+                       tgt = radeonAppendInstructions(t->Program, 1);
 
                        tgt->Opcode = OPCODE_MOV;
-                       tgt->DstReg.File = inst.DstReg.File;
-                       tgt->DstReg.Index = inst.DstReg.Index;
-                       tgt->DstReg.WriteMask = inst.DstReg.WriteMask;
-                       tgt->SrcReg[0].File = PROGRAM_BUILTIN;
-                       tgt->SrcReg[0].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000;
+                       tgt->DstReg = inst.DstReg;
+                       if (comparefunc == GL_ALWAYS) {
+                               tgt->SrcReg[0].File = PROGRAM_BUILTIN;
+                               tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+                       } else {
+                               tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+                       }
                        return GL_TRUE;
                }
 
                inst.DstReg.File = PROGRAM_TEMPORARY;
-               inst.DstReg.Index = radeonCompilerAllocateTemporary(context->compiler);
+               inst.DstReg.Index = radeonFindFreeTemporary(t);
                inst.DstReg.WriteMask = WRITEMASK_XYZW;
+       } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
+               int tempreg = radeonFindFreeTemporary(t);
+
+               inst.DstReg.File = PROGRAM_TEMPORARY;
+               inst.DstReg.Index = tempreg;
+               inst.DstReg.WriteMask = WRITEMASK_XYZW;
+               destredirect = GL_TRUE;
+       }
+
+       if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
+               int tmpreg = radeonFindFreeTemporary(t);
+               tgt = radeonAppendInstructions(t->Program, 1);
+               tgt->Opcode = OPCODE_MOV;
+               tgt->DstReg.File = PROGRAM_TEMPORARY;
+               tgt->DstReg.Index = tmpreg;
+               tgt->SrcReg[0] = inst.SrcReg[0];
+
+               reset_srcreg(&inst.SrcReg[0]);
+               inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+               inst.SrcReg[0].Index = tmpreg;
        }
 
-       tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
-               context->dest->NumInstructions, 1);
+       tgt = radeonAppendInstructions(t->Program, 1);
        _mesa_copy_instructions(tgt, &inst, 1);
 
        if (inst.Opcode != OPCODE_KIL &&
-           compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
                GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
-               int rcptemp = radeonCompilerAllocateTemporary(context->compiler);
+               int rcptemp = radeonFindFreeTemporary(t);
+               int pass, fail;
 
-               tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
-                       context->dest->NumInstructions, 3);
+               tgt = radeonAppendInstructions(t->Program, 3);
 
                tgt[0].Opcode = OPCODE_RCP;
                tgt[0].DstReg.File = PROGRAM_TEMPORARY;
@@ -113,245 +152,133 @@ static GLboolean transform_TEX(
                 *   r  < tex  <=>      -tex+r < 0
                 *   r >= tex  <=> not (-tex+r < 0 */
                if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
-                       tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
+                       tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
                else
-                       tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
+                       tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
 
                tgt[2].Opcode = OPCODE_CMP;
                tgt[2].DstReg = orig_inst->DstReg;
                tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
                tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-               tgt[2].SrcReg[1].File = PROGRAM_BUILTIN;
-               tgt[2].SrcReg[2].File = PROGRAM_BUILTIN;
 
                if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
-                       tgt[2].SrcReg[1].Swizzle = SWIZZLE_1111;
-                       tgt[2].SrcReg[2].Swizzle = SWIZZLE_0000;
+                       pass = 1;
+                       fail = 2;
                } else {
-                       tgt[2].SrcReg[1].Swizzle = SWIZZLE_0000;
-                       tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
+                       pass = 2;
+                       fail = 1;
                }
+
+               tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
+               tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
+               tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
        } else if (destredirect) {
-               tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
-                       context->dest->NumInstructions, 1);
+               tgt = radeonAppendInstructions(t->Program, 1);
 
-               tgt->Opcode = OPCODE_MAD;
+               tgt->Opcode = OPCODE_MOV;
                tgt->DstReg = orig_inst->DstReg;
                tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
                tgt->SrcReg[0].Index = inst.DstReg.Index;
-               tgt->SrcReg[1].File = PROGRAM_BUILTIN;
-               tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
-               tgt->SrcReg[2].File = PROGRAM_BUILTIN;
-               tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
        }
 
        return GL_TRUE;
 }
 
-
-static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp)
+GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
 {
-       struct gl_fragment_program *mp = &fp->mesa_program;
-
-       /* Ask Mesa nicely to fill in ParameterValues for us */
-       if (mp->Base.Parameters)
-               _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
-}
+       GLuint relevant;
+       int i;
+
+       if (opcode == OPCODE_TEX ||
+           opcode == OPCODE_TXB ||
+           opcode == OPCODE_TXP ||
+           opcode == OPCODE_KIL) {
+               if (reg.Abs)
+                       return GL_FALSE;
+
+               if (reg.Negate)
+                       reg.Negate ^= NEGATE_XYZW;
+
+               if (opcode == OPCODE_KIL) {
+                       if (reg.Swizzle != SWIZZLE_NOOP)
+                               return GL_FALSE;
+               } else {
+                       for(i = 0; i < 4; ++i) {
+                               GLuint swz = GET_SWZ(reg.Swizzle, i);
+                               if (swz == SWIZZLE_NIL) {
+                                       reg.Negate &= ~(1 << i);
+                                       continue;
+                               }
+                               if (swz >= 4)
+                                       return GL_FALSE;
+                       }
+               }
 
+               if (reg.Negate)
+                       return GL_FALSE;
 
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- * \todo if/when r5xx supports the radeon_program architecture, this is a
- * likely candidate for code sharing.
- */
-static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
-{
-       GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
+               return GL_TRUE;
+       } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+               /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+                * if it doesn't fit perfectly into a .xyzw case... */
+               if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
+                       return GL_TRUE;
 
-       if (!(InputsRead & FRAG_BIT_WPOS))
-               return;
+               return GL_FALSE;
+       } else {
+               /* ALU instructions support almost everything */
+               if (reg.Abs)
+                       return GL_TRUE;
 
-       static gl_state_index tokens[STATE_LENGTH] = {
-               STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
-       };
-       struct prog_instruction *fpi;
-       GLuint window_index;
-       int i = 0;
-       GLuint tempregi = radeonCompilerAllocateTemporary(&compiler->compiler);
-
-       fpi = radeonClauseInsertInstructions(&compiler->compiler, &compiler->compiler.Clauses[0], 0, 3);
-
-       /* perspective divide */
-       fpi[i].Opcode = OPCODE_RCP;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_W;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-       fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-       fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-       i++;
-
-       fpi[i].Opcode = OPCODE_MUL;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-       fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-       fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-       fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
-       fpi[i].SrcReg[1].Index = tempregi;
-       fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
-       i++;
-
-       /* viewport transformation */
-       window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens);
-
-       fpi[i].Opcode = OPCODE_MAD;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-       fpi[i].SrcReg[0].Index = tempregi;
-       fpi[i].SrcReg[0].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-       fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
-       fpi[i].SrcReg[1].Index = window_index;
-       fpi[i].SrcReg[1].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-       fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
-       fpi[i].SrcReg[2].Index = window_index;
-       fpi[i].SrcReg[2].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-       i++;
-
-       for (; i < compiler->compiler.Clauses[0].NumInstructions; ++i) {
-               int reg;
-               for (reg = 0; reg < 3; reg++) {
-                       if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
-                           fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
-                               fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
-                               fpi[i].SrcReg[reg].Index = tempregi;
-                       }
+               relevant = 0;
+               for(i = 0; i < 3; ++i) {
+                       GLuint swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+                               relevant |= 1 << i;
                }
-       }
-}
+               if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+                       return GL_FALSE;
 
-
-static GLuint build_dtm(GLuint depthmode)
-{
-       switch(depthmode) {
-       default:
-       case GL_LUMINANCE: return 0;
-       case GL_INTENSITY: return 1;
-       case GL_ALPHA: return 2;
+               return GL_TRUE;
        }
 }
 
-static GLuint build_func(GLuint comparefunc)
-{
-       return comparefunc - GL_NEVER;
-}
-
-
 /**
- * Collect all external state that is relevant for compiling the given
- * fragment program.
+ * Implement a MOV with a potentially non-native swizzle.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation. Therefore, we split the MOV into two instructions when necessary.
  */
-static void build_state(
-       r300ContextPtr r300,
-       struct r500_fragment_program *fp,
-       struct r500_fragment_program_external_state *state)
+void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
 {
-       int unit;
-
-       _mesa_bzero(state, sizeof(*state));
-
-       for(unit = 0; unit < 16; ++unit) {
-               if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
-                       struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-
-                       state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
-                       state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
-               }
+       struct prog_instruction *inst;
+       GLuint negatebase[2] = { 0, 0 };
+       int i;
+
+       for(i = 0; i < 4; ++i) {
+               GLuint swz = GET_SWZ(src.Swizzle, i);
+               if (swz == SWIZZLE_NIL)
+                       continue;
+               negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
        }
-}
 
-static void dump_program(struct r500_fragment_program_code *code);
+       _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
+       inst = s->Program->Instructions + s->IP;
 
-void r500TranslateFragmentShader(r300ContextPtr r300,
-                                struct r500_fragment_program *fp)
-{
-       struct r500_fragment_program_external_state state;
+       for(i = 0; i <= 1; ++i) {
+               if (!negatebase[i])
+                       continue;
 
-       build_state(r300, fp, &state);
-       if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
-               /* TODO: cache compiled programs */
-               fp->translated = GL_FALSE;
-               _mesa_memcpy(&fp->state, &state, sizeof(state));
+               inst->Opcode = OPCODE_MOV;
+               inst->DstReg = dst;
+               inst->DstReg.WriteMask = negatebase[i];
+               inst->SrcReg[0] = src;
+               inst++;
+               s->IP++;
        }
-
-       if (!fp->translated) {
-               struct r500_fragment_program_compiler compiler;
-
-               compiler.r300 = r300;
-               compiler.fp = fp;
-               compiler.code = &fp->code;
-
-               radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base);
-
-               insert_WPOS_trailer(&compiler);
-
-               struct radeon_program_transformation transformations[1] = {
-                       { &transform_TEX, &compiler }
-               };
-               radeonClauseLocalTransform(&compiler.compiler,
-                       &compiler.compiler.Clauses[0],
-                       1, transformations);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       _mesa_printf("Compiler state after transformations:\n");
-                       radeonCompilerDump(&compiler.compiler);
-               }
-
-               fp->translated = r500FragmentProgramEmit(&compiler);
-
-               radeonCompilerCleanup(&compiler.compiler);
-
-               r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       fprintf(stderr, "Mesa program:\n");
-                       fprintf(stderr, "-------------\n");
-                       _mesa_print_program(&fp->mesa_program.Base);
-                       fflush(stdout);
-                       if (fp->translated)
-                               dump_program(&fp->code);
-               }
-
-       }
-
-       update_params(r300, fp);
-
 }
 
+
 static char *toswiz(int swiz_val) {
   switch(swiz_val) {
   case 0: return "R";
@@ -449,9 +376,9 @@ static char *to_texop(int val)
   return NULL;
 }
 
-static void dump_program(struct r500_fragment_program_code *code)
+void r500FragmentProgramDump(union rX00_fragment_program_code *c)
 {
-
+  struct r500_fragment_program_code *code = &c->r500;
   fprintf(stderr, "R500 Fragment Program:\n--------\n");
 
   int n;
@@ -462,9 +389,8 @@ static void dump_program(struct r500_fragment_program_code *code)
   if (code->const_nr) {
     fprintf(stderr, "--------\nConstants:\n");
     for (n = 0; n < code->const_nr; n++) {
-      fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n,
-        code->constant[n][0], code->constant[n][1], code->constant[n][2],
-        code->constant[n][3]);
+      fprintf(stderr, "Constant %d: %i[%i]\n", n,
+        code->constant[n].File, code->constant[n].Index);
     }
     fprintf(stderr, "--------\n");
   }