Improving Vladimirs alpha test fix a bit as it turns out r300Enable didnt correctly...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertexprog.c
index 24556f940860adfdb333f127c99d76d539185ddc..02e0b5b7391cf7987c20d10a6c67a1dc578de6cd 100644 (file)
@@ -39,6 +39,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define SCALAR_FLAG (1<<31)
 #define FLAG_MASK (1<<31)
+#define OP_MASK        (0xf)  /* we are unlikely to have more than 15 */
 #define OPN(operator, ip, op) {#operator, VP_OPCODE_##operator, ip, op}
 
 struct{
@@ -249,7 +250,10 @@ void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp)
        _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
        
        //debug_vp(ctx, mesa_vp);
-       
+       if(mesa_vp->Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
+               fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
+               exit(-1);
+       }
        dst_index=0;
        for(pi=0; pi < mesa_vp->Parameters->NumParameters; pi++){
                switch(mesa_vp->Parameters->Parameters[pi].Type){
@@ -348,7 +352,21 @@ static unsigned long t_swizzle(GLubyte swizzle)
                        exit(0);
        }
 }
-               
+
+void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
+{
+       int i;
+       
+       if(vp == NULL)
+               return ;
+       
+       fprintf(stderr, "%s:<", caller);
+       for(i=0; i < VERT_ATTRIB_MAX; i++)
+               fprintf(stderr, "%d ", vp->inputs[i]);
+       fprintf(stderr, ">\n");
+       
+}
+
 static unsigned long t_src_index(struct r300_vertex_program *vp, struct vp_src_register *src)
 {
        int i;
@@ -365,7 +383,6 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, struct vp_src_r
                        
                default: printf("unknown input index %d\n", src->Index); exit(0); break;
                }*/
-                               
                if(vp->inputs[src->Index] != -1)
                        return vp->inputs[src->Index];
                
@@ -375,6 +392,8 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, struct vp_src_r
                
                vp->inputs[src->Index]=max_reg+1;
                
+               //vp_dump_inputs(vp, __FUNCTION__);     
+               
                return vp->inputs[src->Index];
        }else{
                return src->Index;
@@ -430,13 +449,21 @@ static unsigned long op_operands(enum vp_opcode opcode)
        /* Can we trust mesas opcodes to be in order ? */
        for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
                if(op_names[i].opcode == opcode)
-                       return op_names[i].ip;
+                       return op_names[i].ip & OP_MASK;
        
        fprintf(stderr, "op %d not found in op_names\n", opcode);
        exit(-1);
        return 0;
 }
-               
+
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) (a.Index != b.Index && \
+                      ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
+                        t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
+                       (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
+                        t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
+                        
+#define SRCS_WRITABLE 1
 static void translate_program(struct r300_vertex_program *vp)
 {
        struct vertex_program *mesa_vp=(void *)vp;
@@ -445,8 +472,12 @@ static void translate_program(struct r300_vertex_program *vp)
        VERTEX_SHADER_INSTRUCTION t2rs[1024];
        VERTEX_SHADER_INSTRUCTION *o_inst;
        unsigned long operands;
-       int u_temp_i=63; /* Initial value should be last tmp reg that we can use */
-                       
+       int u_temp_i=63; /* Initial value should be last tmp reg that hw supports */
+#ifdef SRCS_WRITABLE
+       struct vp_src_register src[3];
+#else  
+#define src    vpi->SrcReg     
+#endif                 
        vp->t2rs=0;
        vp->program.length=0;
        vp->num_temporaries=mesa_vp->Base.NumTemporaries;
@@ -459,74 +490,134 @@ static void translate_program(struct r300_vertex_program *vp)
                
                operands=op_operands(vpi->Opcode);
                
+               for(i=0; i < operands; i++)
+                       src[i]=vpi->SrcReg[i];
+#if 1
+               if(operands == 3){ /* TODO: scalars */
+                       if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
+                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
+                               
+                               o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+                                               SWIZZLE_X, SWIZZLE_Y,
+                                               SWIZZLE_Z, SWIZZLE_W,
+                                               t_src_class(src[0].File), VSF_FLAG_NONE);
+
+                               o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+                                               SWIZZLE_ZERO, SWIZZLE_ZERO,
+                                               SWIZZLE_ZERO, SWIZZLE_ZERO,
+                                               t_src_class(src[0].File), VSF_FLAG_NONE);
+                               o_inst->src3=0;
+                               o_inst++;
+                                               
+                               src[2].File=PROGRAM_TEMPORARY;
+                               src[2].Index=u_temp_i;
+                               u_temp_i--;
+                       }
+                       
+               }
+               if(operands >= 2){
+                       if( CMP_SRCS(src[1], src[0]) ){
+                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
+                               
+                               o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                               SWIZZLE_X, SWIZZLE_Y,
+                                               SWIZZLE_Z, SWIZZLE_W,
+                                               t_src_class(src[0].File), VSF_FLAG_NONE);
+
+                               o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                               SWIZZLE_ZERO, SWIZZLE_ZERO,
+                                               SWIZZLE_ZERO, SWIZZLE_ZERO,
+                                               t_src_class(src[0].File), VSF_FLAG_NONE);
+                               o_inst->src3=0;
+                               o_inst++;
+                                               
+                               src[0].File=PROGRAM_TEMPORARY;
+                               src[0].Index=u_temp_i;
+                               u_temp_i--;
+                       }
+               }
+#endif         
                /* these ops need special handling.
                   Ops that need temp vars should probably be given reg indexes starting at the end of tmp area. */
                switch(vpi->Opcode){
                case VP_OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[0]),
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
                                        SWIZZLE_ZERO, SWIZZLE_ZERO,
                                        SWIZZLE_ZERO, SWIZZLE_ZERO,
-                                       t_src_class(vpi->SrcReg[0].File), VSF_FLAG_NONE);
+                                       t_src_class(src[0].File), VSF_FLAG_NONE);
 
                        o_inst->src3=0;
                        goto next;
-                       
+                                               
                case VP_OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
                        
-                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[1]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[2]),
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(src[0].Swizzle[0]),
+                                       t_swizzle(src[0].Swizzle[1]),
+                                       t_swizzle(src[0].Swizzle[2]),
                                        SWIZZLE_ZERO,
-                                       t_src_class(vpi->SrcReg[0].File),
-                                       vpi->SrcReg[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
+                                       t_src_class(src[0].File),
+                                       src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
                        
-                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[1]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[0]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[1]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[2]),
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(src[1].Swizzle[0]),
+                                       t_swizzle(src[1].Swizzle[1]),
+                                       t_swizzle(src[1].Swizzle[2]),
                                        SWIZZLE_ZERO,
-                                       t_src_class(vpi->SrcReg[1].File),
-                                       vpi->SrcReg[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
+                                       t_src_class(src[1].File),
+                                       src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
 
                        o_inst->src3=0;
                        goto next;
-                       
+
                case VP_OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+#ifdef SRCS_WRITABLE
+                       vpi->Opcode=VP_OPCODE_ADD;
+                       src[1].Negate=!src[1].Negate;
+                       break;
+#else
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
                        
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[1]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[0]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[1]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[2]),
-                                       t_swizzle(vpi->SrcReg[1].Swizzle[3]),
-                                       t_src_class(vpi->SrcReg[1].File),
-                                       (!vpi->SrcReg[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(src[1].Swizzle[0]),
+                                       t_swizzle(src[1].Swizzle[1]),
+                                       t_swizzle(src[1].Swizzle[2]),
+                                       t_swizzle(src[1].Swizzle[3]),
+                                       t_src_class(src[1].File),
+                                       (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
                        o_inst->src3=0;
                        goto next;
-                       
+#endif                                         
                case VP_OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+#ifdef SRCS_WRITABLE
+                       vpi->Opcode=VP_OPCODE_MAX;
+                       src[1]=src[0];
+                       src[1].Negate=GL_TRUE;
+                       break;
+#else
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
                        
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[1]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[2]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[3]),
-                                       t_src_class(vpi->SrcReg[0].File),
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(src[0].Swizzle[0]),
+                                       t_swizzle(src[0].Swizzle[1]),
+                                       t_swizzle(src[0].Swizzle[2]),
+                                       t_swizzle(src[0].Swizzle[3]),
+                                       t_src_class(src[0].File),
                                        VSF_FLAG_ALL);
                        o_inst->src3=0;
                        goto next;
-                       
+#endif                                         
                case VP_OPCODE_FLR:
                /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
                   ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
@@ -534,7 +625,7 @@ static void translate_program(struct r300_vertex_program *vp)
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
                                        t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
                        
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
+                       o_inst->src1=t_src(vp, &src[0]);
                        o_inst->src2=0;
                        o_inst->src3=0;
                        o_inst++;
@@ -542,7 +633,7 @@ static void translate_program(struct r300_vertex_program *vp)
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
                        
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
+                       o_inst->src1=t_src(vp, &src[0]);
                        o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
                                        VSF_IN_COMPONENT_X,
                                        VSF_IN_COMPONENT_Y,
@@ -550,7 +641,7 @@ static void translate_program(struct r300_vertex_program *vp)
                                        VSF_IN_COMPONENT_W,
                                        VSF_IN_CLASS_TMP,
                                        /* Not 100% sure about this */
-                                       (!vpi->SrcReg[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+                                       (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
 
                        o_inst->src3=0;
                        u_temp_i--;
@@ -560,14 +651,14 @@ static void translate_program(struct r300_vertex_program *vp)
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, vpi->DstReg.Index,
                                        t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
                        
-                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[0]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[1]),
-                                       t_swizzle(vpi->SrcReg[0].Swizzle[2]),
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(src[0].Swizzle[0]),
+                                       t_swizzle(src[0].Swizzle[1]),
+                                       t_swizzle(src[0].Swizzle[2]),
                                        VSF_IN_COMPONENT_ONE,
-                                       t_src_class(vpi->SrcReg[0].File),
-                                       vpi->SrcReg[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
-                       o_inst->src2=t_src(vp, &vpi->SrcReg[1]);
+                                       t_src_class(src[0].File),
+                                       src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
+                       o_inst->src2=t_src(vp, &src[1]);
                        o_inst->src3=0;
                        goto next;
                        
@@ -579,11 +670,11 @@ static void translate_program(struct r300_vertex_program *vp)
                        o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
                                        t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
                        
-                       o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &vpi->SrcReg[0]),
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
                                        SWIZZLE_ZERO, SWIZZLE_ZERO,
                                        SWIZZLE_ZERO, SWIZZLE_ZERO,
-                                       t_src_class(vpi->SrcReg[0].File), VSF_FLAG_NONE);
+                                       t_src_class(src[0].File), VSF_FLAG_NONE);
                        o_inst->src3=0;
                        o_inst++;
                        u_temp_i--;
@@ -598,8 +689,8 @@ static void translate_program(struct r300_vertex_program *vp)
                                        SWIZZLE_ZERO,
                                        VSF_IN_CLASS_TMP,
                                        /* Not 100% sure about this */
-                                       (!vpi->SrcReg[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
-                       o_inst->src2=t_src(vp, &vpi->SrcReg[1]);
+                                       (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+                       o_inst->src2=t_src(vp, &src[1]);
                        o_inst->src3=0;
                        u_temp_i--;
                        o_inst++;
@@ -614,7 +705,7 @@ static void translate_program(struct r300_vertex_program *vp)
                                        SWIZZLE_ONE,
                                        VSF_IN_CLASS_TMP,
                                        /* Not 100% sure about this */
-                                       vpi->SrcReg[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+                                       src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
                        
                        o_inst->src2=MAKE_VSF_SOURCE(u_temp_i+1,
                                        VSF_IN_COMPONENT_X,
@@ -623,10 +714,10 @@ static void translate_program(struct r300_vertex_program *vp)
                                        VSF_IN_COMPONENT_W,
                                        VSF_IN_CLASS_TMP,
                                        /* Not 100% sure about this */
-                                       (!vpi->SrcReg[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+                                       (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
                        o_inst->src3=0;
                        goto next;
-                       
+
                case VP_OPCODE_ARL:
                case VP_OPCODE_SWZ:
                case VP_OPCODE_RCC:
@@ -646,21 +737,21 @@ static void translate_program(struct r300_vertex_program *vp)
 
                switch(operands){
                        case 1:
-                               o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
+                               o_inst->src1=t_src(vp, &src[0]);
                                o_inst->src2=0;
                                o_inst->src3=0;
                        break;
                        
                        case 2:
-                               o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                               o_inst->src2=t_src(vp, &vpi->SrcReg[1]);
+                               o_inst->src1=t_src(vp, &src[0]);
+                               o_inst->src2=t_src(vp, &src[1]);
                                o_inst->src3=0;
                        break;
                        
                        case 3:
-                               o_inst->src1=t_src(vp, &vpi->SrcReg[0]);
-                               o_inst->src2=t_src(vp, &vpi->SrcReg[1]);
-                               o_inst->src3=t_src(vp, &vpi->SrcReg[2]);
+                               o_inst->src1=t_src(vp, &src[0]);
+                               o_inst->src2=t_src(vp, &src[1]);
+                               o_inst->src3=t_src(vp, &src[2]);
                        break;
                        
                        default:
@@ -668,24 +759,24 @@ static void translate_program(struct r300_vertex_program *vp)
                                exit(-1);
                        break;
                }
-               next:
-                               
+               next: ;
+#if 0                          
                /* If instruction writes to result and one of the inputs is tmp, we move it at the end of program */
                if(vpi->DstReg.File == PROGRAM_OUTPUT){
                        for(operand_index=0; operand_index < operands; operand_index++)
-                               if(vpi->SrcReg[operand_index].File == PROGRAM_TEMPORARY){
+                               if(src[operand_index].File == PROGRAM_TEMPORARY){
                                        t2rs[vp->t2rs++]=*o_inst;
                                        o_inst--; /* FIXME */
                                        break;
                                }
                }
-               
+#endif         
        }
-       
+#if 0  
        /* Put "tmp to result" instructions in */
        for(i=0; i < vp->t2rs; i++, o_inst++)
                *o_inst=t2rs[i];
-               
+#endif         
        vp->program.length=(o_inst - vp->program.body.i) * 4;
        
        if(u_temp_i < vp->num_temporaries)