Enable USER_BUFFERS, RADEON_VTXFMT_A and HW_VBOS on big endian by forcing all elts
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Sun, 23 Apr 2006 05:54:06 +0000 (05:54 +0000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Sun, 23 Apr 2006 05:54:06 +0000 (05:54 +0000)
to 32 bits (it would be possible to do a half word swap for 16 bits if somebody is interested) and by using the input route mecanism to swap ubyte's properly in r300EmitArrays. Tested with cva, varray and quake3

src/mesa/drivers/dri/r300/r300_context.c
src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_maos.c
src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c

index 21a474bf675809dd568129ce7669509a6887cd1e..2dd6a6c3354b8d29d715432ce18aa2dc3b03fed4 100644 (file)
@@ -384,6 +384,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
        resize_u_list(r300);
 #endif
 
+#ifdef USER_BUFFERS
     for (i = r300->rmm->u_last + 1; i > 0; i--) {
        if (r300->rmm->u_list[i].ptr == NULL) {
            continue;
@@ -417,6 +418,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
        }
     }
     r300->rmm->u_head = i;
+#endif /* USER_BUFFERS */
 }
 
 /* Destroy the device specific context.
index 58f789eeae744ef0db0ec6b6ddb952c19afaea41..65bf9964df4f44fb1b5ebc695dd40227d38b5165 100644 (file)
@@ -47,11 +47,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "colormac.h"
 #include "radeon_context.h"
 
-/* PPC doesnt support 16 bit elts ... */
-#ifndef MESA_BIG_ENDIAN
 #define USER_BUFFERS
 #define RADEON_VTXFMT_A
 #define HW_VBOS
+
+/* We don't handle 16 bits elts swapping yet */
+#ifdef MESA_BIG_ENDIAN
+#define FORCE_32BITS_ELTS
 #endif
 
 //#define OPTIMIZE_ELTS
index 290ffb44eabbe2a1431d6f48093c4fc9323fb524..6b8365e6d9e6a58720599255a7a012d73ecd5ca1 100644 (file)
@@ -252,6 +252,43 @@ void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_siz
        memcpy(out, elts, n_elts * elt_size);
 }
 
+       /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
+#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
+
+#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
+
+
+static GLuint t_comps(GLuint aos_size)
+{
+       GLuint mask;
+       mask = (1 << (aos_size*3)) - 1;
+       return (ALL_COMPONENTS & mask) | (ALL_DEFAULT & ~mask);
+}
+
+static GLuint fix_comps(GLuint dw, int fmt)
+{      
+#ifdef MESA_BIG_ENDIAN
+       if (fmt == 2) {
+               GLuint dw_temp = 0;
+
+               dw_temp |= ((dw >> R300_INPUT_ROUTE_X_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_W_SHIFT;
+               dw_temp |= ((dw >> R300_INPUT_ROUTE_Y_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Z_SHIFT;
+               dw_temp |= ((dw >> R300_INPUT_ROUTE_Z_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Y_SHIFT;
+               dw_temp |= ((dw >> R300_INPUT_ROUTE_W_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_X_SHIFT;
+               
+               return dw_temp;
+       }
+#endif /* MESA_BIG_ENDIAN */
+       return dw;
+               
+}
+
 /* Emit vertex data to GART memory (unless immediate mode)
  * Route inputs to the vertex processor
  */
@@ -264,7 +301,7 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
        //struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
        GLuint nr = 0;
        GLuint count = VB->Count;
-       GLuint dw,mask;
+       GLuint dw;
        GLuint vic_1 = 0;       /* R300_VAP_INPUT_CNTL_1 */
        GLuint aa_vap_reg = 0; /* VAP register assignment */
        GLuint i;
@@ -469,17 +506,6 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
        ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = (nr+1)>>1;
 
 
-       /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
-#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
-
-#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
-
        R300_STATECHANGE(r300, vir[1]);
 
        for(i=0; i < nr; i++)
@@ -488,33 +514,22 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
                        r300->state.aos[i].aos_size=/*3*/4; /* XXX */
                }
                
-               
-       for(i=0;i+1<nr;i+=2){
+       for (i=0;i+1<nr;i+=2) {
                /* do i first.. */
-               mask=(1<<(r300->state.aos[i].aos_size*3))-1;
-               dw=(ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE;
-
+               dw = fix_comps(t_comps(r300->state.aos[i].aos_size), r300->state.aos[i].aos_format) | R300_INPUT_ROUTE_ENABLE;
                /* i+1 */
-               mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
-               dw|=(
-               (ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE
-               )<<16;
-
+               dw |= (fix_comps(t_comps(r300->state.aos[i+1].aos_size), r300->state.aos[i+1].aos_format) | R300_INPUT_ROUTE_ENABLE) << 16;
+               
                //fprintf(stderr, "vir1 dw=%08x\n", dw);
                r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
-               }
-       if(nr & 1){
-               mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1;
-               dw=(ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE;
-               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
+       }
+       if (nr & 1) {
+               dw = fix_comps(t_comps(r300->state.aos[nr-1].aos_size), r300->state.aos[nr-1].aos_format) | R300_INPUT_ROUTE_ENABLE;
+               
                //fprintf(stderr, "vir1 dw=%08x\n", dw);
-               }
+               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
+       }
+
        /* Set the rest of INPUT_ROUTE_1 to 0 */
        //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
        ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = (nr+1)>>1;
index 8f7620482f344f9af005fdcba65299b8be905606..941ff0078e62a14fe8b9160053f00a44fc61c6b5 100644 (file)
@@ -216,14 +216,22 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
                                max = ((unsigned char *)indices)[i];
                }
                
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                elt_size = 2;
-               
+#endif         
                r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
                        
+#ifdef FORCE_32BITS_ELTS
+               for (i=0; i < count; i++)
+                       ((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
+#else
                for (i=0; i < count; i++)
                        ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
+#endif
        break;
                
        case GL_UNSIGNED_SHORT:
@@ -234,14 +242,23 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
                                max = ((unsigned short int *)indices)[i];
                }
                
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                elt_size = 2;
+#endif
                
                r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
                
+#ifdef FORCE_32BITS_ELTS
+               for (i=0; i < count; i++)
+                       ((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
+#else
                for (i=0; i < count; i++)
                        ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
+#endif
        break;
        
        case GL_UNSIGNED_INT:
@@ -252,17 +269,20 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
                                max = ((unsigned int *)indices)[i];
                }
                
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                if (max - min <= 65535)
                        elt_size = 2;
                else 
                        elt_size = 4;
-               
+#endif
                r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
                
                
-               if (max - min <= 65535)
+               if (elt_size == 2)
                        for (i=0; i < count; i++)
                                ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
                else
@@ -388,19 +408,30 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
        memset(&rvb, 0, sizeof(rvb));
        switch (type){
        case GL_UNSIGNED_BYTE:
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                elt_size = 2;
-               
+#endif 
                r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
                
+#ifdef FORCE_32BITS_ELTS
+               for(i=0; i < count; i++)
+                       ((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
+#else
                for(i=0; i < count; i++)
                        ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
+#endif
        break;
        
        case GL_UNSIGNED_SHORT:
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                elt_size = 2;
-               
+#endif 
 #ifdef OPTIMIZE_ELTS
                if (min == 0 && ctx->Array.ElementArrayBufferObj->Name){
                        ptr = indices;
@@ -411,21 +442,29 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
 
+#ifdef FORCE_32BITS_ELTS
+               for(i=0; i < count; i++)
+                       ((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
+#else
                for(i=0; i < count; i++)
                        ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
+#endif
        break;
        
        case GL_UNSIGNED_INT:
+#ifdef FORCE_32BITS_ELTS
+               elt_size = 4;
+#else
                if (max - min <= 65535)
                        elt_size = 2;
                else 
                        elt_size = 4;
-               
+#endif 
                r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
                rvb.aos_offset = GET_START(&rvb);
                ptr = rvb.address + rvb.start;
                
-               if (max - min <= 65535)
+               if (elt_size == 2)
                        for (i=0; i < count; i++)
                                ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
                else