r300: add hw accelerated support for different vertex data formats
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
index 526a0ea928fa82dfc8396c8828d3a9ab485dd3ef..8f0b70ad3a3eeab52f547b4657d60b85884d9c14 100644 (file)
 
 #include "r500_fragprog.h"
 
-#include "radeon_nqssadce.h"
-#include "radeon_program_alu.h"
-
-
 static void reset_srcreg(struct prog_src_register* reg)
 {
        _mesa_bzero(reg, sizeof(*reg));
@@ -58,7 +54,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
  *  - introduce a temporary register when write masks are needed
  *
  */
-static GLboolean transform_TEX(
+GLboolean r500_transform_TEX(
        struct radeon_transform_context *t,
        struct prog_instruction* orig_inst, void* data)
 {
@@ -156,9 +152,9 @@ static GLboolean transform_TEX(
                 *   r  < tex  <=>      -tex+r < 0
                 *   r >= tex  <=> not (-tex+r < 0 */
                if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
-                       tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
+                       tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
                else
-                       tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
+                       tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
 
                tgt[2].Opcode = OPCODE_CMP;
                tgt[2].DstReg = orig_inst->DstReg;
@@ -188,119 +184,7 @@ static GLboolean transform_TEX(
        return GL_TRUE;
 }
 
-
-static void update_params(GLcontext *ctx, struct gl_fragment_program *fp)
-{
-       /* Ask Mesa nicely to fill in ParameterValues for us */
-       if (fp->Base.Parameters)
-               _mesa_load_state_parameters(ctx, fp->Base.Parameters);
-}
-
-
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- * \todo if/when r5xx supports the radeon_program architecture, this is a
- * likely candidate for code sharing.
- */
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
-{
-       GLuint InputsRead = compiler->fp->Base.Base.InputsRead;
-
-       if (!(InputsRead & FRAG_BIT_WPOS))
-               return;
-
-       static gl_state_index tokens[STATE_LENGTH] = {
-               STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
-       };
-       struct prog_instruction *fpi;
-       GLuint window_index;
-       int i = 0;
-       GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
-       _mesa_insert_instructions(compiler->program, 0, 3);
-       fpi = compiler->program->Instructions;
-
-       /* perspective divide */
-       fpi[i].Opcode = OPCODE_RCP;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_W;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-       fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-       fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-       i++;
-
-       fpi[i].Opcode = OPCODE_MUL;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-       fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-       fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-       fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
-       fpi[i].SrcReg[1].Index = tempregi;
-       fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
-       i++;
-
-       /* viewport transformation */
-       window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
-       fpi[i].Opcode = OPCODE_MAD;
-
-       fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-       fpi[i].DstReg.Index = tempregi;
-       fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-       fpi[i].DstReg.CondMask = COND_TR;
-
-       fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-       fpi[i].SrcReg[0].Index = tempregi;
-       fpi[i].SrcReg[0].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-       fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
-       fpi[i].SrcReg[1].Index = window_index;
-       fpi[i].SrcReg[1].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-       fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
-       fpi[i].SrcReg[2].Index = window_index;
-       fpi[i].SrcReg[2].Swizzle =
-           MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-       i++;
-
-       for (; i < compiler->program->NumInstructions; ++i) {
-               int reg;
-               for (reg = 0; reg < 3; reg++) {
-                       if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
-                           fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
-                               fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
-                               fpi[i].SrcReg[reg].Index = tempregi;
-                       }
-               }
-       }
-}
-
-
-static void nqssadce_init(struct nqssadce_state* s)
-{
-       s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
-       s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
-}
-
-static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
+GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
 {
        GLuint relevant;
        int i;
@@ -312,8 +196,8 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
                if (reg.Abs)
                        return GL_FALSE;
 
-               if (reg.NegateAbs)
-                       reg.NegateBase ^= 15;
+               if (reg.Negate)
+                       reg.Negate ^= NEGATE_XYZW;
 
                if (opcode == OPCODE_KIL) {
                        if (reg.Swizzle != SWIZZLE_NOOP)
@@ -322,7 +206,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
                        for(i = 0; i < 4; ++i) {
                                GLuint swz = GET_SWZ(reg.Swizzle, i);
                                if (swz == SWIZZLE_NIL) {
-                                       reg.NegateBase &= ~(1 << i);
+                                       reg.Negate &= ~(1 << i);
                                        continue;
                                }
                                if (swz >= 4)
@@ -330,15 +214,14 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
                        }
                }
 
-               if (reg.NegateBase)
+               if (reg.Negate)
                        return GL_FALSE;
 
                return GL_TRUE;
        } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
                /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
                 * if it doesn't fit perfectly into a .xyzw case... */
-               if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs
-                               && !reg.NegateBase && !reg.NegateAbs)
+               if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
                        return GL_TRUE;
 
                return GL_FALSE;
@@ -353,7 +236,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
                        if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
                                relevant |= 1 << i;
                }
-               if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+               if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
                        return GL_FALSE;
 
                return GL_TRUE;
@@ -366,8 +249,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
  * The only thing we *cannot* do in an ALU instruction is per-component
  * negation. Therefore, we split the MOV into two instructions when necessary.
  */
-static void nqssadce_build_swizzle(struct nqssadce_state *s,
-       struct prog_dst_register dst, struct prog_src_register src)
+void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
 {
        struct prog_instruction *inst;
        GLuint negatebase[2] = { 0, 0 };
@@ -377,7 +259,7 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s,
                GLuint swz = GET_SWZ(src.Swizzle, i);
                if (swz == SWIZZLE_NIL)
                        continue;
-               negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
+               negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
        }
 
        _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
@@ -396,127 +278,6 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s,
        }
 }
 
-static GLuint build_dtm(GLuint depthmode)
-{
-       switch(depthmode) {
-       default:
-       case GL_LUMINANCE: return 0;
-       case GL_INTENSITY: return 1;
-       case GL_ALPHA: return 2;
-       }
-}
-
-static GLuint build_func(GLuint comparefunc)
-{
-       return comparefunc - GL_NEVER;
-}
-
-
-/**
- * Collect all external state that is relevant for compiling the given
- * fragment program.
- */
-static void build_state(
-       r300ContextPtr r300,
-       struct r300_fragment_program *fp,
-       struct r300_fragment_program_external_state *state)
-{
-       int unit;
-
-       _mesa_bzero(state, sizeof(*state));
-
-       for(unit = 0; unit < 16; ++unit) {
-               if (fp->Base.Base.ShadowSamplers & (1 << unit)) {
-                       struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-
-                       state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
-                       state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
-               }
-       }
-}
-
-static void dump_program(struct r500_fragment_program_code *code);
-
-void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
-{
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-       struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp;
-       struct r300_fragment_program_external_state state;
-
-       build_state(r300, r300_fp, &state);
-       if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) {
-               /* TODO: cache compiled programs */
-               r300_fp->translated = GL_FALSE;
-               _mesa_memcpy(&r300_fp->state, &state, sizeof(state));
-       }
-
-       if (!r300_fp->translated) {
-               struct r300_fragment_program_compiler compiler;
-
-               compiler.r300 = r300;
-               compiler.fp = r300_fp;
-               compiler.code = &r300_fp->code;
-               compiler.program = _mesa_clone_program(ctx, &fp->Base);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       _mesa_printf("Compiler: Initial program:\n");
-                       _mesa_print_program(compiler.program);
-               }
-
-               insert_WPOS_trailer(&compiler);
-
-               struct radeon_program_transformation transformations[] = {
-                       { &transform_TEX, &compiler },
-                       { &radeonTransformALU, 0 },
-                       { &radeonTransformDeriv, 0 },
-                       { &radeonTransformTrigScale, 0 }
-               };
-               radeonLocalTransform(ctx, compiler.program, 4, transformations);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       _mesa_printf("Compiler: after native rewrite:\n");
-                       _mesa_print_program(compiler.program);
-               }
-
-               struct radeon_nqssadce_descr nqssadce = {
-                       .Init = &nqssadce_init,
-                       .IsNativeSwizzle = &is_native_swizzle,
-                       .BuildSwizzle = &nqssadce_build_swizzle,
-                       .RewriteDepthOut = GL_TRUE
-               };
-               radeonNqssaDce(ctx, compiler.program, &nqssadce);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       _mesa_printf("Compiler: after NqSSA-DCE:\n");
-                       _mesa_print_program(compiler.program);
-               }
-
-               if (!r500FragmentProgramEmit(&compiler))
-                       r300_fp->error = GL_TRUE;
-
-               r300_fp->translated = GL_TRUE;
-
-               /* Subtle: Rescue any parameters that have been added during transformations */
-               _mesa_free_parameter_list(fp->Base.Parameters);
-               fp->Base.Parameters = compiler.program->Parameters;
-               compiler.program->Parameters = 0;
-
-               _mesa_reference_program(ctx, &compiler.program, 0);
-
-               r300UpdateStateParameters(ctx, _NEW_PROGRAM);
-
-               if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       if (!r300_fp->error) {
-                               _mesa_printf("Machine-readable code:\n");
-                               dump_program(&r300_fp->code.r500);
-                       }
-               }
-
-       }
-
-       update_params(ctx, fp);
-
-}
 
 static char *toswiz(int swiz_val) {
   switch(swiz_val) {
@@ -615,9 +376,9 @@ static char *to_texop(int val)
   return NULL;
 }
 
-static void dump_program(struct r500_fragment_program_code *code)
+void r500FragmentProgramDump(union rX00_fragment_program_code *c)
 {
-
+  struct r500_fragment_program_code *code = &c->r500;
   fprintf(stderr, "R500 Fragment Program:\n--------\n");
 
   int n;